Mercurial > hg > dml-open-backendtools
view pyspark/transforms/tonicHistogram.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
line wrap: on
line source
# Part of DML (Digital Music Laboratory) # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # -*- coding: utf-8 -*- __author__="hargreavess" from rdflib import Graph, Namespace, BNode, RDF, Literal from n3Parser import get_rdf_graph_from_n3 from csvParser import get_dict_from_csv, get_array_from_csv dml_ns = Namespace("http://dml.org/dml/cla#") # Add triples representing a 'tonic histogram' result to # an RDF graph def add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files): output_bnode = BNode() output_rdf_graph.add((transform, dml_ns.output, output_bnode)) for input_f_file in input_f_files: output_rdf_graph.add((transform, dml_ns.input, input_f_file)) output_rdf_graph.add((output_bnode, RDF.type, dml_ns.TonicHistogram)) output_rdf_graph.add((output_bnode, dml_ns.sample_count, Literal(sample_count))) for tonic in tonic_histogram: bin_bnode = BNode() output_rdf_graph.add((output_bnode, dml_ns.bin, bin_bnode)) output_rdf_graph.add((bin_bnode, dml_ns.bin_number, Literal(tonic))) output_rdf_graph.add((bin_bnode, dml_ns.bin_value, Literal(tonic_histogram.get(tonic)))) return output_rdf_graph # Parse the input_f_files n3 files, and generate # a tonic histogram def find_cla_tonic_histogram(input_f_files): num_f_files = len(input_f_files) tonic_hist = dict() for x in range(1,13): tonic_hist[x] = 0 for input_f_file in input_f_files: # tonic = find_last_key_in_piece(input_f_file) tonic = find_most_common_key_in_piece(input_f_file) tonic_hist[tonic] = tonic_hist.get(tonic) + 1 return (tonic_hist, num_f_files) def find_most_common_key_in_piece(input_f_file): tonic_hist = find_tonic_histogram(input_f_file) duration_of_tonic = max(tonic_hist.values()) result = -1 for tonic in tonic_hist: if tonic_hist[tonic] == duration_of_tonic: result = tonic return result # Parse the input_f_files n3 file, and generate # a tonic histogram def find_tonic_histogram(input_f_file): tonic_hist = dict() for x in range(1,13): tonic_hist[x] = 0 if input_f_file.endswith('.csv'): # ['time','keynr','label'] csv_array = get_array_from_csv(input_f_file) for idx in range(1, len(csv_array)): tonic_duration = csv_array[idx][0] - csv_array[idx - 1][0] tonic = int(csv_array[idx - 1][1]) tonic_hist[tonic] = tonic_hist.get(tonic) + tonic_duration else: # TODO - n3 version of tonic histogram # for now use last key in piece tonic = find_last_key_in_piece(input_f_file) tonic_hist[tonic] = tonic_hist.get(tonic) + 1 return (tonic_hist) # Determine the last (temporally) key in the # input_f_file n3 file def find_last_key_in_piece(input_f_file): max_time = 0 last_key = 0 if input_f_file.endswith('.csv'): csv_dict = get_dict_from_csv(input_f_file, columtype = ['time','keynr','label']) for row in csv_dict: tl_time = float(row['time']) if tl_time > max_time: max_time = tl_time last_key = row['keynr'] else: key_feature_graph = get_rdf_graph_from_n3(input_f_file) qres = key_feature_graph.query( """prefix dml: <http://dml.org/dml/cla#> prefix event: <http://purl.org/NET/c4dm/event.owl#> prefix tl: <http://purl.org/NET/c4dm/timeline.owl#> prefix af: <http://purl.org/ontology/af/> SELECT ?event ?key ?tl_time WHERE { ?event event:time ?event_time . ?event_time tl:at ?tl_time . ?event af:feature ?key . }""") for row in qres: tl_time_str_len = len(row.tl_time) tl_time = float(row.tl_time[2:tl_time_str_len-1]) if tl_time > max_time: max_time = tl_time last_key = row.key return int(last_key)