Mercurial > hg > dml-open-backendtools
diff pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyspark/dml-analyser.py Sat Feb 20 18:14:24 2016 +0100 @@ -0,0 +1,188 @@ +# Part of DML (Digital Music Laboratory) +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#!/usr/bin/env python +# -*- coding: utf-8 -*- +__author__="hargreavess" + +import ConfigParser +import logging +import os +import time +import shutil +import argparse +from os import walk +import rdflib +from rdflib import Graph +from RDFClosure import DeductiveClosure, OWLRL_Semantics +from transforms.tonicHistogram import find_cla_tonic_histogram, add_tonic_histogram_to_graph +from transforms.tuningFrequencyStatistics import find_cla_tf_statistics, add_tf_statistics_to_graph +from transforms.semitoneHistogram import find_cla_semitone_histogram, add_semitone_histogram_to_graph +from transforms.tonicNormSemitoneHistogram import find_cla_tonic_norm_semitone_histogram, add_tonic_norm_semitone_histogram_to_graph + +input_rdf_graph = Graph() + +def main(): + + # get config + config = ConfigParser.ConfigParser() + config.read('dml-analyser.cfg') + + # parse dmlcla ontolgy + input_rdf_graph.parse(config.get('Ontology', 'dmlclaOntology_URI'), format="n3") + DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph) + + # parse input rdf + input_rdf_graph.parse(args.transforms, format="n3") + DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph) + + # initialise output rdf graph + output_rdf_graph = Graph() + + # Determine which transforms are to be applied, and + # the associated input files + transforms = find_transforms_in_n3(input_rdf_graph) + + # Apply the transform(s) to each file and create + # rdf results graph + output_rdf_graph = execute_transforms(transforms, output_rdf_graph) + + # Write output rdf to stdout + print(output_rdf_graph.serialize(format='n3')) + +# Loop through all transforms, process the corresponding +# input files appropriately and add the (RDF) result to output_rdf_graph +def execute_transforms(transforms, output_rdf_graph): + + transform_iter = transforms.iterkeys() + key_histogram = [] + + for (transform, transform_type) in transforms: + + input_f_files = transforms.get((transform, transform_type)) + + # Add additional clauses to this if statement + # for each transform type + if transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonic'): + + (tonic_histogram, sample_count) = find_cla_tonic_histogram(input_f_files) + output_rdf_graph = add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files) + + elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTuningFrequencyStatistics'): + + statistics, sample_count = find_cla_tf_statistics(input_f_files) + output_rdf_graph = add_tf_statistics_to_graph(statistics, output_rdf_graph, transform, sample_count, input_f_files) + + elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelSemitone'): + + (semitone_histogram, sample_count) = find_cla_semitone_histogram(input_f_files) + output_rdf_graph = add_semitone_histogram_to_graph(semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files) + + elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonicNormSemitone'): + + (tonic_norm_semitone_histogram, sample_count) = find_cla_tonic_norm_semitone_histogram(input_f_files, input_rdf_graph) + output_rdf_graph = add_tonic_norm_semitone_histogram_to_graph(tonic_norm_semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files, input_rdf_graph) + + return output_rdf_graph + +# Find all transforms, and their associated input files, +# from rdf_graph +def find_transforms_in_n3(rdf_graph): + + qres = rdf_graph.query( + """prefix dml: <http://dml.org/dml/cla#> + SELECT ?transform ?dml_input ?transform_type + WHERE { + ?transform a dml:Transform . + ?transform dml:input ?dml_input . + ?transform dml:type ?transform_type . + }""") + + transforms = dict() + + for row in qres: + + transform_bnode = row.transform + dml_input = row.dml_input + transform_type = row.transform_type + + if transforms.has_key((transform_bnode, transform_type)): + + transform_key = transforms.get((transform_bnode, transform_type)) + transform_key.append(dml_input) + + else: + + transforms[(transform_bnode, transform_type)] = [dml_input] + + return transforms + +# Determine the mapping between feature file URIs and +# their source audio file URIs +def map_audio_to_feature_files(): + + # Loop through audio files + lines = [line.strip() for line in args.audio_files] + + for audio_file in lines: + + print "sonic-annotator -T " + args.transforms + " --rdf-basedir " + args.basedir + " <" + audio_file + ">" + + audio_to_feature_file_dict = dict() + + for (dirpath, dirnames, filenames) in walk(args.basedir): + for file in filenames: + + print "found file: " + file + + if file.endswith(".n3"): + + print "found n3 file: " + file + + # open and parse n3 file + rdf_graph = Graph() + rdf_graph.parse(os.path.join(dirpath, file), format="n3") + + # find subject in ?subject a mo:AudioFile + qres = rdf_graph.query( + """SELECT ?audio_file + WHERE { + ?audio_file a mo:AudioFile . + }""") + + print len(qres) + + for row in qres: + + print("audio file URI is %s" % row.audio_file.n3()) + print("feature file URI is %s" % os.path.join(os.getcwd(), dirpath, file)) + audio_to_feature_file_dict[row.audio_file.n3()] = os.path.join(os.getcwd(), dirpath, file) + + # add full file URI, subject to dict + + print audio_to_feature_file_dict + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + + parser.add_argument("-T", "--transforms", help="the URI of an n3 (RDF) file describing one or more transforms, and the files to which they should be applied") + parser.add_argument("-b", "--basedir", help="the URI of the base output directory") + + args = parser.parse_args() + + main() +