dml-open-backendtools: pyspark/dml-analyser.py annotate

annotate pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip

commit

author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children

rev	line source
Daniel@0	1 # Part of DML (Digital Music Laboratory)
Daniel@0	2 #
Daniel@0	3 # This program is free software; you can redistribute it and/or
Daniel@0	4 # modify it under the terms of the GNU General Public License
Daniel@0	5 # as published by the Free Software Foundation; either version 2
Daniel@0	6 # of the License, or (at your option) any later version.
Daniel@0	7 #
Daniel@0	8 # This program is distributed in the hope that it will be useful,
Daniel@0	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0	11 # GNU General Public License for more details.
Daniel@0	12 #
Daniel@0	13 # You should have received a copy of the GNU General Public
Daniel@0	14 # License along with this library; if not, write to the Free Software
Daniel@0	15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0	16
Daniel@0	17 #!/usr/bin/env python
Daniel@0	18 # -- coding: utf-8 --
Daniel@0	19 __author__="hargreavess"
Daniel@0	20
Daniel@0	21 import ConfigParser
Daniel@0	22 import logging
Daniel@0	23 import os
Daniel@0	24 import time
Daniel@0	25 import shutil
Daniel@0	26 import argparse
Daniel@0	27 from os import walk
Daniel@0	28 import rdflib
Daniel@0	29 from rdflib import Graph
Daniel@0	30 from RDFClosure import DeductiveClosure, OWLRL_Semantics
Daniel@0	31 from transforms.tonicHistogram import find_cla_tonic_histogram, add_tonic_histogram_to_graph
Daniel@0	32 from transforms.tuningFrequencyStatistics import find_cla_tf_statistics, add_tf_statistics_to_graph
Daniel@0	33 from transforms.semitoneHistogram import find_cla_semitone_histogram, add_semitone_histogram_to_graph
Daniel@0	34 from transforms.tonicNormSemitoneHistogram import find_cla_tonic_norm_semitone_histogram, add_tonic_norm_semitone_histogram_to_graph
Daniel@0	35
Daniel@0	36 input_rdf_graph = Graph()
Daniel@0	37
Daniel@0	38 def main():
Daniel@0	39
Daniel@0	40 # get config
Daniel@0	41 config = ConfigParser.ConfigParser()
Daniel@0	42 config.read('dml-analyser.cfg')
Daniel@0	43
Daniel@0	44 # parse dmlcla ontolgy
Daniel@0	45 input_rdf_graph.parse(config.get('Ontology', 'dmlclaOntology_URI'), format="n3")
Daniel@0	46 DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)
Daniel@0	47
Daniel@0	48 # parse input rdf
Daniel@0	49 input_rdf_graph.parse(args.transforms, format="n3")
Daniel@0	50 DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)
Daniel@0	51
Daniel@0	52 # initialise output rdf graph
Daniel@0	53 output_rdf_graph = Graph()
Daniel@0	54
Daniel@0	55 # Determine which transforms are to be applied, and
Daniel@0	56 # the associated input files
Daniel@0	57 transforms = find_transforms_in_n3(input_rdf_graph)
Daniel@0	58
Daniel@0	59 # Apply the transform(s) to each file and create
Daniel@0	60 # rdf results graph
Daniel@0	61 output_rdf_graph = execute_transforms(transforms, output_rdf_graph)
Daniel@0	62
Daniel@0	63 # Write output rdf to stdout
Daniel@0	64 print(output_rdf_graph.serialize(format='n3'))
Daniel@0	65
Daniel@0	66 # Loop through all transforms, process the corresponding
Daniel@0	67 # input files appropriately and add the (RDF) result to output_rdf_graph
Daniel@0	68 def execute_transforms(transforms, output_rdf_graph):
Daniel@0	69
Daniel@0	70 transform_iter = transforms.iterkeys()
Daniel@0	71 key_histogram = []
Daniel@0	72
Daniel@0	73 for (transform, transform_type) in transforms:
Daniel@0	74
Daniel@0	75 input_f_files = transforms.get((transform, transform_type))
Daniel@0	76
Daniel@0	77 # Add additional clauses to this if statement
Daniel@0	78 # for each transform type
Daniel@0	79 if transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonic'):
Daniel@0	80
Daniel@0	81 (tonic_histogram, sample_count) = find_cla_tonic_histogram(input_f_files)
Daniel@0	82 output_rdf_graph = add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files)
Daniel@0	83
Daniel@0	84 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTuningFrequencyStatistics'):
Daniel@0	85
Daniel@0	86 statistics, sample_count = find_cla_tf_statistics(input_f_files)
Daniel@0	87 output_rdf_graph = add_tf_statistics_to_graph(statistics, output_rdf_graph, transform, sample_count, input_f_files)
Daniel@0	88
Daniel@0	89 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelSemitone'):
Daniel@0	90
Daniel@0	91 (semitone_histogram, sample_count) = find_cla_semitone_histogram(input_f_files)
Daniel@0	92 output_rdf_graph = add_semitone_histogram_to_graph(semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files)
Daniel@0	93
Daniel@0	94 elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonicNormSemitone'):
Daniel@0	95
Daniel@0	96 (tonic_norm_semitone_histogram, sample_count) = find_cla_tonic_norm_semitone_histogram(input_f_files, input_rdf_graph)
Daniel@0	97 output_rdf_graph = add_tonic_norm_semitone_histogram_to_graph(tonic_norm_semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files, input_rdf_graph)
Daniel@0	98
Daniel@0	99 return output_rdf_graph
Daniel@0	100
Daniel@0	101 # Find all transforms, and their associated input files,
Daniel@0	102 # from rdf_graph
Daniel@0	103 def find_transforms_in_n3(rdf_graph):
Daniel@0	104
Daniel@0	105 qres = rdf_graph.query(
Daniel@0	106 """prefix dml: <http://dml.org/dml/cla#>
Daniel@0	107 SELECT ?transform ?dml_input ?transform_type
Daniel@0	108 WHERE {
Daniel@0	109 ?transform a dml:Transform .
Daniel@0	110 ?transform dml:input ?dml_input .
Daniel@0	111 ?transform dml:type ?transform_type .
Daniel@0	112 }""")
Daniel@0	113
Daniel@0	114 transforms = dict()
Daniel@0	115
Daniel@0	116 for row in qres:
Daniel@0	117
Daniel@0	118 transform_bnode = row.transform
Daniel@0	119 dml_input = row.dml_input
Daniel@0	120 transform_type = row.transform_type
Daniel@0	121
Daniel@0	122 if transforms.has_key((transform_bnode, transform_type)):
Daniel@0	123
Daniel@0	124 transform_key = transforms.get((transform_bnode, transform_type))
Daniel@0	125 transform_key.append(dml_input)
Daniel@0	126
Daniel@0	127 else:
Daniel@0	128
Daniel@0	129 transforms[(transform_bnode, transform_type)] = [dml_input]
Daniel@0	130
Daniel@0	131 return transforms
Daniel@0	132
Daniel@0	133 # Determine the mapping between feature file URIs and
Daniel@0	134 # their source audio file URIs
Daniel@0	135 def map_audio_to_feature_files():
Daniel@0	136
Daniel@0	137 # Loop through audio files
Daniel@0	138 lines = [line.strip() for line in args.audio_files]
Daniel@0	139
Daniel@0	140 for audio_file in lines:
Daniel@0	141
Daniel@0	142 print "sonic-annotator -T " + args.transforms + " --rdf-basedir " + args.basedir + " <" + audio_file + ">"
Daniel@0	143
Daniel@0	144 audio_to_feature_file_dict = dict()
Daniel@0	145
Daniel@0	146 for (dirpath, dirnames, filenames) in walk(args.basedir):
Daniel@0	147 for file in filenames:
Daniel@0	148
Daniel@0	149 print "found file: " + file
Daniel@0	150
Daniel@0	151 if file.endswith(".n3"):
Daniel@0	152
Daniel@0	153 print "found n3 file: " + file
Daniel@0	154
Daniel@0	155 # open and parse n3 file
Daniel@0	156 rdf_graph = Graph()
Daniel@0	157 rdf_graph.parse(os.path.join(dirpath, file), format="n3")
Daniel@0	158
Daniel@0	159 # find subject in ?subject a mo:AudioFile
Daniel@0	160 qres = rdf_graph.query(
Daniel@0	161 """SELECT ?audio_file
Daniel@0	162 WHERE {
Daniel@0	163 ?audio_file a mo:AudioFile .
Daniel@0	164 }""")
Daniel@0	165
Daniel@0	166 print len(qres)
Daniel@0	167
Daniel@0	168 for row in qres:
Daniel@0	169
Daniel@0	170 print("audio file URI is %s" % row.audio_file.n3())
Daniel@0	171 print("feature file URI is %s" % os.path.join(os.getcwd(), dirpath, file))
Daniel@0	172 audio_to_feature_file_dict[row.audio_file.n3()] = os.path.join(os.getcwd(), dirpath, file)
Daniel@0	173
Daniel@0	174 # add full file URI, subject to dict
Daniel@0	175
Daniel@0	176 print audio_to_feature_file_dict
Daniel@0	177
Daniel@0	178 if __name__ == "__main__":
Daniel@0	179
Daniel@0	180 parser = argparse.ArgumentParser()
Daniel@0	181
Daniel@0	182 parser.add_argument("-T", "--transforms", help="the URI of an n3 (RDF) file describing one or more transforms, and the files to which they should be applied")
Daniel@0	183 parser.add_argument("-b", "--basedir", help="the URI of the base output directory")
Daniel@0	184
Daniel@0	185 args = parser.parse_args()
Daniel@0	186
Daniel@0	187 main()
Daniel@0	188

Mercurial > hg > dml-open-backendtools

annotate pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip