dml-open-backendtools: pyspark/dml-analyser.py comparison

comparison pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip

commit

author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:e34cf1b6fe09
+# Part of DML (Digital Music Laboratory)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+__author__="hargreavess"
+import ConfigParser
+import logging
+import os
+import time
+import shutil
+import argparse
+from os import walk
+import rdflib
+from rdflib import Graph
+from RDFClosure import DeductiveClosure, OWLRL_Semantics
+from transforms.tonicHistogram import find_cla_tonic_histogram, add_tonic_histogram_to_graph
+from transforms.tuningFrequencyStatistics import find_cla_tf_statistics, add_tf_statistics_to_graph
+from transforms.semitoneHistogram import find_cla_semitone_histogram, add_semitone_histogram_to_graph
+from transforms.tonicNormSemitoneHistogram import find_cla_tonic_norm_semitone_histogram, add_tonic_norm_semitone_histogram_to_graph
+input_rdf_graph = Graph()
+def main():
+# get config
+config = ConfigParser.ConfigParser()
+config.read('dml-analyser.cfg')
+# parse dmlcla ontolgy
+input_rdf_graph.parse(config.get('Ontology', 'dmlclaOntology_URI'), format="n3")
+DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)
+# parse input rdf
+input_rdf_graph.parse(args.transforms, format="n3")
+DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)
+# initialise output rdf graph
+output_rdf_graph = Graph()
+# Determine which transforms are to be applied, and
+# the associated input files
+transforms = find_transforms_in_n3(input_rdf_graph)
+# Apply the transform(s) to each file and create
+# rdf results graph
+output_rdf_graph = execute_transforms(transforms, output_rdf_graph)
+# Write output rdf to stdout
+print(output_rdf_graph.serialize(format='n3'))
+# Loop through all transforms, process the corresponding
+# input files appropriately and add the (RDF) result to output_rdf_graph
+def execute_transforms(transforms, output_rdf_graph):
+transform_iter = transforms.iterkeys()
+key_histogram = []
+for (transform, transform_type) in transforms:
+input_f_files = transforms.get((transform, transform_type))
+# Add additional clauses to this if statement
+# for each transform type
+if transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonic'):
+(tonic_histogram, sample_count) = find_cla_tonic_histogram(input_f_files)
+output_rdf_graph = add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files)
+elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTuningFrequencyStatistics'):
+statistics, sample_count = find_cla_tf_statistics(input_f_files)
+output_rdf_graph = add_tf_statistics_to_graph(statistics, output_rdf_graph, transform, sample_count, input_f_files)
+elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelSemitone'):
+(semitone_histogram, sample_count) = find_cla_semitone_histogram(input_f_files)
+output_rdf_graph = add_semitone_histogram_to_graph(semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files)
+elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonicNormSemitone'):
+(tonic_norm_semitone_histogram, sample_count) = find_cla_tonic_norm_semitone_histogram(input_f_files, input_rdf_graph)
+output_rdf_graph = add_tonic_norm_semitone_histogram_to_graph(tonic_norm_semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files, input_rdf_graph)
+return output_rdf_graph
+# Find all transforms, and their associated input files,
+# from rdf_graph
+def find_transforms_in_n3(rdf_graph):
+qres = rdf_graph.query(
+"""prefix dml:     <http://dml.org/dml/cla#>
+SELECT ?transform ?dml_input ?transform_type
+WHERE {
+?transform a dml:Transform .
+?transform dml:input ?dml_input .
+?transform dml:type ?transform_type .
+}""")
+transforms = dict()
+for row in qres:
+transform_bnode = row.transform
+dml_input = row.dml_input
+transform_type = row.transform_type
+if transforms.has_key((transform_bnode, transform_type)):
+transform_key = transforms.get((transform_bnode, transform_type))
+transform_key.append(dml_input)
+else:
+transforms[(transform_bnode, transform_type)] = [dml_input]
+return transforms
+# Determine the mapping between feature file URIs and
+# their source audio file URIs
+def map_audio_to_feature_files():
+# Loop through audio files
+lines = [line.strip() for line in args.audio_files]
+for audio_file in lines:
+print "sonic-annotator -T " + args.transforms + " --rdf-basedir " + args.basedir + " <" + audio_file + ">"
+audio_to_feature_file_dict = dict()
+for (dirpath, dirnames, filenames) in walk(args.basedir):
+for file in filenames:
+print "found file: " + file
+if file.endswith(".n3"):
+print "found n3 file: " + file
+# open and parse n3 file
+rdf_graph = Graph()
+rdf_graph.parse(os.path.join(dirpath, file), format="n3")
+# find subject in ?subject a mo:AudioFile
+qres = rdf_graph.query(
+"""SELECT ?audio_file
+WHERE {
+?audio_file a mo:AudioFile .
+}""")
+print len(qres)
+for row in qres:
+print("audio file URI is %s" % row.audio_file.n3())
+print("feature file URI is %s" % os.path.join(os.getcwd(), dirpath, file))
+audio_to_feature_file_dict[row.audio_file.n3()] = os.path.join(os.getcwd(), dirpath, file)
+# add full file URI, subject to dict
+print audio_to_feature_file_dict
+if __name__ == "__main__":
+parser = argparse.ArgumentParser()
+parser.add_argument("-T", "--transforms", help="the URI of an n3 (RDF) file describing one or more transforms, and the files to which they should be applied")
+parser.add_argument("-b", "--basedir", help="the URI of the base output directory")
+args = parser.parse_args()
+main()

Mercurial > hg > dml-open-backendtools

comparison pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip