comparison pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip

commit
author Daniel Wolff
date Sat, 20 Feb 2016 18:14:24 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e34cf1b6fe09
1 # Part of DML (Digital Music Laboratory)
2 #
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public
14 # License along with this library; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
17 #!/usr/bin/env python
18 # -*- coding: utf-8 -*-
19 __author__="hargreavess"
20
21 import ConfigParser
22 import logging
23 import os
24 import time
25 import shutil
26 import argparse
27 from os import walk
28 import rdflib
29 from rdflib import Graph
30 from RDFClosure import DeductiveClosure, OWLRL_Semantics
31 from transforms.tonicHistogram import find_cla_tonic_histogram, add_tonic_histogram_to_graph
32 from transforms.tuningFrequencyStatistics import find_cla_tf_statistics, add_tf_statistics_to_graph
33 from transforms.semitoneHistogram import find_cla_semitone_histogram, add_semitone_histogram_to_graph
34 from transforms.tonicNormSemitoneHistogram import find_cla_tonic_norm_semitone_histogram, add_tonic_norm_semitone_histogram_to_graph
35
36 input_rdf_graph = Graph()
37
38 def main():
39
40 # get config
41 config = ConfigParser.ConfigParser()
42 config.read('dml-analyser.cfg')
43
44 # parse dmlcla ontolgy
45 input_rdf_graph.parse(config.get('Ontology', 'dmlclaOntology_URI'), format="n3")
46 DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)
47
48 # parse input rdf
49 input_rdf_graph.parse(args.transforms, format="n3")
50 DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)
51
52 # initialise output rdf graph
53 output_rdf_graph = Graph()
54
55 # Determine which transforms are to be applied, and
56 # the associated input files
57 transforms = find_transforms_in_n3(input_rdf_graph)
58
59 # Apply the transform(s) to each file and create
60 # rdf results graph
61 output_rdf_graph = execute_transforms(transforms, output_rdf_graph)
62
63 # Write output rdf to stdout
64 print(output_rdf_graph.serialize(format='n3'))
65
66 # Loop through all transforms, process the corresponding
67 # input files appropriately and add the (RDF) result to output_rdf_graph
68 def execute_transforms(transforms, output_rdf_graph):
69
70 transform_iter = transforms.iterkeys()
71 key_histogram = []
72
73 for (transform, transform_type) in transforms:
74
75 input_f_files = transforms.get((transform, transform_type))
76
77 # Add additional clauses to this if statement
78 # for each transform type
79 if transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonic'):
80
81 (tonic_histogram, sample_count) = find_cla_tonic_histogram(input_f_files)
82 output_rdf_graph = add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files)
83
84 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTuningFrequencyStatistics'):
85
86 statistics, sample_count = find_cla_tf_statistics(input_f_files)
87 output_rdf_graph = add_tf_statistics_to_graph(statistics, output_rdf_graph, transform, sample_count, input_f_files)
88
89 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelSemitone'):
90
91 (semitone_histogram, sample_count) = find_cla_semitone_histogram(input_f_files)
92 output_rdf_graph = add_semitone_histogram_to_graph(semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files)
93
94 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonicNormSemitone'):
95
96 (tonic_norm_semitone_histogram, sample_count) = find_cla_tonic_norm_semitone_histogram(input_f_files, input_rdf_graph)
97 output_rdf_graph = add_tonic_norm_semitone_histogram_to_graph(tonic_norm_semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files, input_rdf_graph)
98
99 return output_rdf_graph
100
101 # Find all transforms, and their associated input files,
102 # from rdf_graph
103 def find_transforms_in_n3(rdf_graph):
104
105 qres = rdf_graph.query(
106 """prefix dml: <http://dml.org/dml/cla#>
107 SELECT ?transform ?dml_input ?transform_type
108 WHERE {
109 ?transform a dml:Transform .
110 ?transform dml:input ?dml_input .
111 ?transform dml:type ?transform_type .
112 }""")
113
114 transforms = dict()
115
116 for row in qres:
117
118 transform_bnode = row.transform
119 dml_input = row.dml_input
120 transform_type = row.transform_type
121
122 if transforms.has_key((transform_bnode, transform_type)):
123
124 transform_key = transforms.get((transform_bnode, transform_type))
125 transform_key.append(dml_input)
126
127 else:
128
129 transforms[(transform_bnode, transform_type)] = [dml_input]
130
131 return transforms
132
133 # Determine the mapping between feature file URIs and
134 # their source audio file URIs
135 def map_audio_to_feature_files():
136
137 # Loop through audio files
138 lines = [line.strip() for line in args.audio_files]
139
140 for audio_file in lines:
141
142 print "sonic-annotator -T " + args.transforms + " --rdf-basedir " + args.basedir + " <" + audio_file + ">"
143
144 audio_to_feature_file_dict = dict()
145
146 for (dirpath, dirnames, filenames) in walk(args.basedir):
147 for file in filenames:
148
149 print "found file: " + file
150
151 if file.endswith(".n3"):
152
153 print "found n3 file: " + file
154
155 # open and parse n3 file
156 rdf_graph = Graph()
157 rdf_graph.parse(os.path.join(dirpath, file), format="n3")
158
159 # find subject in ?subject a mo:AudioFile
160 qres = rdf_graph.query(
161 """SELECT ?audio_file
162 WHERE {
163 ?audio_file a mo:AudioFile .
164 }""")
165
166 print len(qres)
167
168 for row in qres:
169
170 print("audio file URI is %s" % row.audio_file.n3())
171 print("feature file URI is %s" % os.path.join(os.getcwd(), dirpath, file))
172 audio_to_feature_file_dict[row.audio_file.n3()] = os.path.join(os.getcwd(), dirpath, file)
173
174 # add full file URI, subject to dict
175
176 print audio_to_feature_file_dict
177
178 if __name__ == "__main__":
179
180 parser = argparse.ArgumentParser()
181
182 parser.add_argument("-T", "--transforms", help="the URI of an n3 (RDF) file describing one or more transforms, and the files to which they should be applied")
183 parser.add_argument("-b", "--basedir", help="the URI of the base output directory")
184
185 args = parser.parse_args()
186
187 main()
188