Mercurial > hg > dml-open-backendtools
diff pyspark/transforms/tonicHistogram.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyspark/transforms/tonicHistogram.py Sat Feb 20 18:14:24 2016 +0100 @@ -0,0 +1,156 @@ +# Part of DML (Digital Music Laboratory) +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# -*- coding: utf-8 -*- +__author__="hargreavess" + +from rdflib import Graph, Namespace, BNode, RDF, Literal +from n3Parser import get_rdf_graph_from_n3 +from csvParser import get_dict_from_csv, get_array_from_csv + +dml_ns = Namespace("http://dml.org/dml/cla#") + +# Add triples representing a 'tonic histogram' result to +# an RDF graph +def add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files): + + output_bnode = BNode() + output_rdf_graph.add((transform, dml_ns.output, output_bnode)) + for input_f_file in input_f_files: + output_rdf_graph.add((transform, dml_ns.input, input_f_file)) + output_rdf_graph.add((output_bnode, RDF.type, dml_ns.TonicHistogram)) + output_rdf_graph.add((output_bnode, dml_ns.sample_count, Literal(sample_count))) + + for tonic in tonic_histogram: + + bin_bnode = BNode() + output_rdf_graph.add((output_bnode, dml_ns.bin, bin_bnode)) + output_rdf_graph.add((bin_bnode, dml_ns.bin_number, Literal(tonic))) + output_rdf_graph.add((bin_bnode, dml_ns.bin_value, Literal(tonic_histogram.get(tonic)))) + + return output_rdf_graph + +# Parse the input_f_files n3 files, and generate +# a tonic histogram +def find_cla_tonic_histogram(input_f_files): + + num_f_files = len(input_f_files) + tonic_hist = dict() + + for x in range(1,13): + + tonic_hist[x] = 0 + + for input_f_file in input_f_files: + +# tonic = find_last_key_in_piece(input_f_file) + tonic = find_most_common_key_in_piece(input_f_file) + tonic_hist[tonic] = tonic_hist.get(tonic) + 1 + + return (tonic_hist, num_f_files) + +def find_most_common_key_in_piece(input_f_file): + + tonic_hist = find_tonic_histogram(input_f_file) + duration_of_tonic = max(tonic_hist.values()) + result = -1 + + for tonic in tonic_hist: + + if tonic_hist[tonic] == duration_of_tonic: + result = tonic + + return result + +# Parse the input_f_files n3 file, and generate +# a tonic histogram +def find_tonic_histogram(input_f_file): + + tonic_hist = dict() + + for x in range(1,13): + + tonic_hist[x] = 0 + + if input_f_file.endswith('.csv'): + + # ['time','keynr','label'] + csv_array = get_array_from_csv(input_f_file) + + for idx in range(1, len(csv_array)): + + tonic_duration = csv_array[idx][0] - csv_array[idx - 1][0] + tonic = int(csv_array[idx - 1][1]) + tonic_hist[tonic] = tonic_hist.get(tonic) + tonic_duration + + else: + + # TODO - n3 version of tonic histogram + # for now use last key in piece + tonic = find_last_key_in_piece(input_f_file) + tonic_hist[tonic] = tonic_hist.get(tonic) + 1 + + return (tonic_hist) + +# Determine the last (temporally) key in the +# input_f_file n3 file +def find_last_key_in_piece(input_f_file): + + max_time = 0 + last_key = 0 + + if input_f_file.endswith('.csv'): + + csv_dict = get_dict_from_csv(input_f_file, columtype = ['time','keynr','label']) + + for row in csv_dict: + + tl_time = float(row['time']) + + if tl_time > max_time: + + max_time = tl_time + last_key = row['keynr'] + + + else: + + key_feature_graph = get_rdf_graph_from_n3(input_f_file) + + qres = key_feature_graph.query( + """prefix dml: <http://dml.org/dml/cla#> + prefix event: <http://purl.org/NET/c4dm/event.owl#> + prefix tl: <http://purl.org/NET/c4dm/timeline.owl#> + prefix af: <http://purl.org/ontology/af/> + SELECT ?event ?key ?tl_time + WHERE { + ?event event:time ?event_time . + ?event_time tl:at ?tl_time . + ?event af:feature ?key . + }""") + + for row in qres: + + tl_time_str_len = len(row.tl_time) + tl_time = float(row.tl_time[2:tl_time_str_len-1]) + + if tl_time > max_time: + + max_time = tl_time + last_key = row.key + + + return int(last_key)