view pyspark/dml-analyser.py @ 0:e34cf1b6fe09 tip

commit
author Daniel Wolff
date Sat, 20 Feb 2016 18:14:24 +0100
parents
children
line wrap: on
line source
# Part of DML (Digital Music Laboratory)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__="hargreavess"

import ConfigParser
import logging
import os
import time
import shutil
import argparse
from os import walk
import rdflib
from rdflib import Graph
from RDFClosure import DeductiveClosure, OWLRL_Semantics
from transforms.tonicHistogram import find_cla_tonic_histogram, add_tonic_histogram_to_graph
from transforms.tuningFrequencyStatistics import find_cla_tf_statistics, add_tf_statistics_to_graph
from transforms.semitoneHistogram import find_cla_semitone_histogram, add_semitone_histogram_to_graph
from transforms.tonicNormSemitoneHistogram import find_cla_tonic_norm_semitone_histogram, add_tonic_norm_semitone_histogram_to_graph

input_rdf_graph = Graph()

def main():

    # get config
    config = ConfigParser.ConfigParser()
    config.read('dml-analyser.cfg')

    # parse dmlcla ontolgy
    input_rdf_graph.parse(config.get('Ontology', 'dmlclaOntology_URI'), format="n3")
    DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)

    # parse input rdf
    input_rdf_graph.parse(args.transforms, format="n3")
    DeductiveClosure(OWLRL_Semantics).expand(input_rdf_graph)

    # initialise output rdf graph
    output_rdf_graph = Graph()

    # Determine which transforms are to be applied, and
    # the associated input files
    transforms = find_transforms_in_n3(input_rdf_graph)

    # Apply the transform(s) to each file and create 
    # rdf results graph
    output_rdf_graph = execute_transforms(transforms, output_rdf_graph)

    # Write output rdf to stdout
    print(output_rdf_graph.serialize(format='n3'))

# Loop through all transforms, process the corresponding
# input files appropriately and add the (RDF) result to output_rdf_graph
def execute_transforms(transforms, output_rdf_graph):

    transform_iter = transforms.iterkeys()
    key_histogram = []

    for (transform, transform_type) in transforms:

        input_f_files = transforms.get((transform, transform_type))

        # Add additional clauses to this if statement
        # for each transform type
        if transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonic'):

            (tonic_histogram, sample_count) = find_cla_tonic_histogram(input_f_files)
            output_rdf_graph = add_tonic_histogram_to_graph(tonic_histogram, output_rdf_graph, transform, sample_count, input_f_files)
            
        elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTuningFrequencyStatistics'):

            statistics, sample_count = find_cla_tf_statistics(input_f_files)
            output_rdf_graph = add_tf_statistics_to_graph(statistics, output_rdf_graph, transform, sample_count, input_f_files)

        elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelSemitone'):

            (semitone_histogram, sample_count) = find_cla_semitone_histogram(input_f_files)
            output_rdf_graph = add_semitone_histogram_to_graph(semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files)

        elif transform_type == rdflib.term.URIRef(u'http://dml.org/dml/cla#CollectionLevelTonicNormSemitone'):

            (tonic_norm_semitone_histogram, sample_count) = find_cla_tonic_norm_semitone_histogram(input_f_files, input_rdf_graph)
            output_rdf_graph = add_tonic_norm_semitone_histogram_to_graph(tonic_norm_semitone_histogram, output_rdf_graph, transform, sample_count, input_f_files, input_rdf_graph)

    return output_rdf_graph

# Find all transforms, and their associated input files,
# from rdf_graph
def find_transforms_in_n3(rdf_graph):

    qres = rdf_graph.query(
        """prefix dml:     <http://dml.org/dml/cla#>
            SELECT ?transform ?dml_input ?transform_type
            WHERE {
                ?transform a dml:Transform .
                ?transform dml:input ?dml_input .
                ?transform dml:type ?transform_type .
            }""")

    transforms = dict()
    
    for row in qres:

        transform_bnode = row.transform
        dml_input = row.dml_input
        transform_type = row.transform_type

        if transforms.has_key((transform_bnode, transform_type)):

            transform_key = transforms.get((transform_bnode, transform_type))
            transform_key.append(dml_input)

        else:

            transforms[(transform_bnode, transform_type)] = [dml_input]

    return transforms

# Determine the mapping between feature file URIs and
# their source audio file URIs
def map_audio_to_feature_files():    

    # Loop through audio files
    lines = [line.strip() for line in args.audio_files]

    for audio_file in lines:

        print "sonic-annotator -T " + args.transforms + " --rdf-basedir " + args.basedir + " <" + audio_file + ">"

    audio_to_feature_file_dict = dict()

    for (dirpath, dirnames, filenames) in walk(args.basedir):
        for file in filenames:

            print "found file: " + file

            if file.endswith(".n3"):

                print "found n3 file: " + file

                # open and parse n3 file
                rdf_graph = Graph()
                rdf_graph.parse(os.path.join(dirpath, file), format="n3")

                # find subject in ?subject a mo:AudioFile
                qres = rdf_graph.query(
                    """SELECT ?audio_file
                       WHERE {
                          ?audio_file a mo:AudioFile .
                       }""")

                print len(qres)

                for row in qres:

                    print("audio file URI is %s" % row.audio_file.n3())
                    print("feature file URI is %s" % os.path.join(os.getcwd(), dirpath, file))
                    audio_to_feature_file_dict[row.audio_file.n3()] = os.path.join(os.getcwd(), dirpath, file)

                # add full file URI, subject to dict

    print audio_to_feature_file_dict

if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    parser.add_argument("-T", "--transforms", help="the URI of an n3 (RDF) file describing one or more transforms, and the files to which they should be applied")
    parser.add_argument("-b", "--basedir", help="the URI of the base output directory")

    args = parser.parse_args()

    main()