Mercurial > hg > dml-open-backendtools
comparison pyspark/transforms/tonicHistogram.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e34cf1b6fe09 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # | |
3 # This program is free software; you can redistribute it and/or | |
4 # modify it under the terms of the GNU General Public License | |
5 # as published by the Free Software Foundation; either version 2 | |
6 # of the License, or (at your option) any later version. | |
7 # | |
8 # This program is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public | |
14 # License along with this library; if not, write to the Free Software | |
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
16 | |
17 # -*- coding: utf-8 -*- | |
18 __author__="hargreavess" | |
19 | |
20 from rdflib import Graph, Namespace, BNode, RDF, Literal | |
21 from n3Parser import get_rdf_graph_from_n3 | |
22 from csvParser import get_dict_from_csv, get_array_from_csv | |
23 | |
24 dml_ns = Namespace("http://dml.org/dml/cla#") | |
25 | |
26 # Add triples representing a 'tonic histogram' result to | |
27 # an RDF graph | |
28 def add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files): | |
29 | |
30 output_bnode = BNode() | |
31 output_rdf_graph.add((transform, dml_ns.output, output_bnode)) | |
32 for input_f_file in input_f_files: | |
33 output_rdf_graph.add((transform, dml_ns.input, input_f_file)) | |
34 output_rdf_graph.add((output_bnode, RDF.type, dml_ns.TonicHistogram)) | |
35 output_rdf_graph.add((output_bnode, dml_ns.sample_count, Literal(sample_count))) | |
36 | |
37 for tonic in tonic_histogram: | |
38 | |
39 bin_bnode = BNode() | |
40 output_rdf_graph.add((output_bnode, dml_ns.bin, bin_bnode)) | |
41 output_rdf_graph.add((bin_bnode, dml_ns.bin_number, Literal(tonic))) | |
42 output_rdf_graph.add((bin_bnode, dml_ns.bin_value, Literal(tonic_histogram.get(tonic)))) | |
43 | |
44 return output_rdf_graph | |
45 | |
46 # Parse the input_f_files n3 files, and generate | |
47 # a tonic histogram | |
48 def find_cla_tonic_histogram(input_f_files): | |
49 | |
50 num_f_files = len(input_f_files) | |
51 tonic_hist = dict() | |
52 | |
53 for x in range(1,13): | |
54 | |
55 tonic_hist[x] = 0 | |
56 | |
57 for input_f_file in input_f_files: | |
58 | |
59 # tonic = find_last_key_in_piece(input_f_file) | |
60 tonic = find_most_common_key_in_piece(input_f_file) | |
61 tonic_hist[tonic] = tonic_hist.get(tonic) + 1 | |
62 | |
63 return (tonic_hist, num_f_files) | |
64 | |
65 def find_most_common_key_in_piece(input_f_file): | |
66 | |
67 tonic_hist = find_tonic_histogram(input_f_file) | |
68 duration_of_tonic = max(tonic_hist.values()) | |
69 result = -1 | |
70 | |
71 for tonic in tonic_hist: | |
72 | |
73 if tonic_hist[tonic] == duration_of_tonic: | |
74 result = tonic | |
75 | |
76 return result | |
77 | |
78 # Parse the input_f_files n3 file, and generate | |
79 # a tonic histogram | |
80 def find_tonic_histogram(input_f_file): | |
81 | |
82 tonic_hist = dict() | |
83 | |
84 for x in range(1,13): | |
85 | |
86 tonic_hist[x] = 0 | |
87 | |
88 if input_f_file.endswith('.csv'): | |
89 | |
90 # ['time','keynr','label'] | |
91 csv_array = get_array_from_csv(input_f_file) | |
92 | |
93 for idx in range(1, len(csv_array)): | |
94 | |
95 tonic_duration = csv_array[idx][0] - csv_array[idx - 1][0] | |
96 tonic = int(csv_array[idx - 1][1]) | |
97 tonic_hist[tonic] = tonic_hist.get(tonic) + tonic_duration | |
98 | |
99 else: | |
100 | |
101 # TODO - n3 version of tonic histogram | |
102 # for now use last key in piece | |
103 tonic = find_last_key_in_piece(input_f_file) | |
104 tonic_hist[tonic] = tonic_hist.get(tonic) + 1 | |
105 | |
106 return (tonic_hist) | |
107 | |
108 # Determine the last (temporally) key in the | |
109 # input_f_file n3 file | |
110 def find_last_key_in_piece(input_f_file): | |
111 | |
112 max_time = 0 | |
113 last_key = 0 | |
114 | |
115 if input_f_file.endswith('.csv'): | |
116 | |
117 csv_dict = get_dict_from_csv(input_f_file, columtype = ['time','keynr','label']) | |
118 | |
119 for row in csv_dict: | |
120 | |
121 tl_time = float(row['time']) | |
122 | |
123 if tl_time > max_time: | |
124 | |
125 max_time = tl_time | |
126 last_key = row['keynr'] | |
127 | |
128 | |
129 else: | |
130 | |
131 key_feature_graph = get_rdf_graph_from_n3(input_f_file) | |
132 | |
133 qres = key_feature_graph.query( | |
134 """prefix dml: <http://dml.org/dml/cla#> | |
135 prefix event: <http://purl.org/NET/c4dm/event.owl#> | |
136 prefix tl: <http://purl.org/NET/c4dm/timeline.owl#> | |
137 prefix af: <http://purl.org/ontology/af/> | |
138 SELECT ?event ?key ?tl_time | |
139 WHERE { | |
140 ?event event:time ?event_time . | |
141 ?event_time tl:at ?tl_time . | |
142 ?event af:feature ?key . | |
143 }""") | |
144 | |
145 for row in qres: | |
146 | |
147 tl_time_str_len = len(row.tl_time) | |
148 tl_time = float(row.tl_time[2:tl_time_str_len-1]) | |
149 | |
150 if tl_time > max_time: | |
151 | |
152 max_time = tl_time | |
153 last_key = row.key | |
154 | |
155 | |
156 return int(last_key) |