Mercurial > hg > dml-open-backendtools
comparison pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e34cf1b6fe09 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # | |
3 # This program is free software; you can redistribute it and/or | |
4 # modify it under the terms of the GNU General Public License | |
5 # as published by the Free Software Foundation; either version 2 | |
6 # of the License, or (at your option) any later version. | |
7 # | |
8 # This program is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public | |
14 # License along with this library; if not, write to the Free Software | |
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
16 | |
17 #!/usr/bin/env python | |
18 # -*- coding: utf-8 -*- | |
19 __author__="hargreavess" | |
20 | |
21 import ConfigParser | |
22 import logging | |
23 import os | |
24 import time | |
25 import shutil | |
26 import argparse | |
27 from os import walk | |
28 import rdflib | |
29 from rdflib import Graph | |
30 from RDFClosure import DeductiveClosure, OWLRL_Semantics | |
31 from transforms.tonicHistogram import find_cla_tonic_histogram, add_tonic_histogram_to_graph | |
32 from transforms.tuningFrequencyStatistics import find_cla_tf_statistics, add_tf_statistics_to_graph | |
33 from transforms.semitoneHistogram import find_cla_semitone_histogram, add_semitone_histogram_to_graph | |
34 from transforms.tonicNormSemitoneHistogram import find_cla_tonic_norm_semitone_histogram, add_tonic_norm_semitone_histogram_to_graph | |
35 | |
36 input_rdf_graph = Graph() | |
37 | |
38 def main(): | |
39 | |
40 # get config | |
41 config = ConfigParser.ConfigParser() | |
42 config.read('dml-analyser.cfg') | |
43 | |
44 # parse dmlcla ontolgy | |
45 input_rdf_graph.parse(config.get('Ontology', 'dmlclaOntology_URI'), format="n3") | |
46 DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph) | |
47 | |
48 # parse input rdf | |
49 input_rdf_graph.parse(args.transforms, format="n3") | |
50 DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph) | |
51 | |
52 # initialise output rdf graph | |
53 output_rdf_graph = Graph() | |
54 | |
55 # Determine which transforms are to be applied, and | |
56 # the associated input files | |
57 transforms = find_transforms_in_n3(input_rdf_graph) | |
58 | |
59 # Apply the transform(s) to each file and create | |
60 # rdf results graph | |
61 output_rdf_graph = execute_transforms(transforms, output_rdf_graph) | |
62 | |
63 # Write output rdf to stdout | |
64 print(output_rdf_graph.serialize(format='n3')) | |
65 | |
66 # Loop through all transforms, process the corresponding | |
67 # input files appropriately and add the (RDF) result to output_rdf_graph | |
68 def execute_transforms(transforms, output_rdf_graph): | |
69 | |
70 transform_iter = transforms.iterkeys() | |
71 key_histogram = [] | |
72 | |
73 for (transform, transform_type) in transforms: | |
74 | |
75 input_f_files = transforms.get((transform, transform_type)) | |
76 | |
77 # Add additional clauses to this if statement | |
78 # for each transform type | |
79 if transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonic'): | |
80 | |
81 (tonic_histogram, sample_count) = find_cla_tonic_histogram(input_f_files) | |
82 output_rdf_graph = add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files) | |
83 | |
84 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTuningFrequencyStatistics'): | |
85 | |
86 statistics, sample_count = find_cla_tf_statistics(input_f_files) | |
87 output_rdf_graph = add_tf_statistics_to_graph(statistics, output_rdf_graph, transform, sample_count, input_f_files) | |
88 | |
89 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelSemitone'): | |
90 | |
91 (semitone_histogram, sample_count) = find_cla_semitone_histogram(input_f_files) | |
92 output_rdf_graph = add_semitone_histogram_to_graph(semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files) | |
93 | |
94 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonicNormSemitone'): | |
95 | |
96 (tonic_norm_semitone_histogram, sample_count) = find_cla_tonic_norm_semitone_histogram(input_f_files, input_rdf_graph) | |
97 output_rdf_graph = add_tonic_norm_semitone_histogram_to_graph(tonic_norm_semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files, input_rdf_graph) | |
98 | |
99 return output_rdf_graph | |
100 | |
101 # Find all transforms, and their associated input files, | |
102 # from rdf_graph | |
103 def find_transforms_in_n3(rdf_graph): | |
104 | |
105 qres = rdf_graph.query( | |
106 """prefix dml: <http://dml.org/dml/cla#> | |
107 SELECT ?transform ?dml_input ?transform_type | |
108 WHERE { | |
109 ?transform a dml:Transform . | |
110 ?transform dml:input ?dml_input . | |
111 ?transform dml:type ?transform_type . | |
112 }""") | |
113 | |
114 transforms = dict() | |
115 | |
116 for row in qres: | |
117 | |
118 transform_bnode = row.transform | |
119 dml_input = row.dml_input | |
120 transform_type = row.transform_type | |
121 | |
122 if transforms.has_key((transform_bnode, transform_type)): | |
123 | |
124 transform_key = transforms.get((transform_bnode, transform_type)) | |
125 transform_key.append(dml_input) | |
126 | |
127 else: | |
128 | |
129 transforms[(transform_bnode, transform_type)] = [dml_input] | |
130 | |
131 return transforms | |
132 | |
133 # Determine the mapping between feature file URIs and | |
134 # their source audio file URIs | |
135 def map_audio_to_feature_files(): | |
136 | |
137 # Loop through audio files | |
138 lines = [line.strip() for line in args.audio_files] | |
139 | |
140 for audio_file in lines: | |
141 | |
142 print "sonic-annotator -T " + args.transforms + " --rdf-basedir " + args.basedir + " <" + audio_file + ">" | |
143 | |
144 audio_to_feature_file_dict = dict() | |
145 | |
146 for (dirpath, dirnames, filenames) in walk(args.basedir): | |
147 for file in filenames: | |
148 | |
149 print "found file: " + file | |
150 | |
151 if file.endswith(".n3"): | |
152 | |
153 print "found n3 file: " + file | |
154 | |
155 # open and parse n3 file | |
156 rdf_graph = Graph() | |
157 rdf_graph.parse(os.path.join(dirpath, file), format="n3") | |
158 | |
159 # find subject in ?subject a mo:AudioFile | |
160 qres = rdf_graph.query( | |
161 """SELECT ?audio_file | |
162 WHERE { | |
163 ?audio_file a mo:AudioFile . | |
164 }""") | |
165 | |
166 print len(qres) | |
167 | |
168 for row in qres: | |
169 | |
170 print("audio file URI is %s" % row.audio_file.n3()) | |
171 print("feature file URI is %s" % os.path.join(os.getcwd(), dirpath, file)) | |
172 audio_to_feature_file_dict[row.audio_file.n3()] = os.path.join(os.getcwd(), dirpath, file) | |
173 | |
174 # add full file URI, subject to dict | |
175 | |
176 print audio_to_feature_file_dict | |
177 | |
178 if __name__ == "__main__": | |
179 | |
180 parser = argparse.ArgumentParser() | |
181 | |
182 parser.add_argument("-T", "--transforms", help="the URI of an n3 (RDF) file describing one or more transforms, and the files to which they should be applied") | |
183 parser.add_argument("-b", "--basedir", help="the URI of the base output directory") | |
184 | |
185 args = parser.parse_args() | |
186 | |
187 main() | |
188 |