dml-open-backendtools: ipcluster/test_sonic_annotator

comparison ipcluster/test_sonic_annotator_notimeside.py @ 0:e34cf1b6fe09 tip

commit

author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:e34cf1b6fe09
+# Part of DML (Digital Music Laboratory)
+# Copyright 2014-2015 Daniel Wolff, City University
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+#!/usr/local/spark-1.0.0-bin-hadoop2/bin/spark-submit
+# -*- coding: utf-8 -*-
+__author__="wolffd"
+__date__ ="$11-Jul-2014 15:31:01$"
+import sys
+import time
+import os
+import hashlib
+from IPython.parallel import Client
+# this is the main routine to be submmitted as a spark job
+#
+#
+# Running python applications through ./bin/pyspark is deprecated as of Spark 1.0.
+# Use ./bin/spark-submit <python file> --py-files sonic_annotator_vamp.py
+# you can also provide a zip of all necessary python files
+#
+# @param string audiopath root of the folder structure to be traversed
+# @param string transform_file path to the .n3 turtle file describing the transform
+#def main(audiopath = './',
+#         transform_file = 'silvet_settings.n3',
+#         masterip = '0.0.0.0):
+def main(audiopath, transform_path, out_path = ''):
+print "iPCluster implementation for Vamp processing"
+# ---
+# initialise ipcluster
+# ---
+#time.sleep(20)
+rc = Client()
+nb_core = len(rc.ids)
+lview = rc.load_balanced_view()
+lview.block = False # asynch now
+dview = rc[:]
+dview.block = True
+# import libraries
+with dview.sync_imports():
+import sys
+import os
+import sonic_annotator_vamp
+# here traverse the file structure
+data = []
+count = 0
+for (dirpath, dirnames, filenames) in os.walk(audiopath):
+for file in filenames:
+print '\rChecked %d files' % (count),
+count = count + 1
+if file.endswith(".wav") or file.endswith(".mp3") or file.endswith(".flac"):
+data.append(os.path.join(dirpath, file).replace('\\','/'))
+# count jobs
+njobs = len(data)
+# we now allow
+if transform_path.endswith(".n3"):
+transform_files = [transform_path]
+else:
+transform_files = []
+for file in os.listdir(transform_path):
+if file.endswith(".n3"):
+transform_files.append(transform_path + file)
+for transform_file in transform_files:
+# get transform hash
+BLOCKSIZE = 65536
+hasher = hashlib.sha1()
+with open(transform_file, 'rb') as afile:
+buf = afile.read(BLOCKSIZE)
+while len(buf) > 0:
+hasher.update(buf)
+buf = afile.read(BLOCKSIZE)
+hash = str(hasher.hexdigest())
+# create action containing data and parameter file
+action = [(x,transform_file,hash,out_path) for x in data]
+# output the current task
+tpath = os.path.split(transform_file)
+print "Using " + tpath[1] + " on " + str(njobs) + " files"
+# ---
+# do the work!
+# ---
+ar = lview.map(sonic_annotator_vamp.transform, action)
+# asynch process output
+tic = time.time()
+while True:
+# update time used
+toc = time.time()-tic
+# update progress
+msgset = set(ar.msg_ids)
+completed = len(msgset.difference(rc.outstanding))
+pending = len(msgset.intersection(rc.outstanding))
+if completed > 0:
+timerem = ((toc/completed) * pending) / 3600.0
+print '\rRunning %3.2f hrs: %3.2f percent. %d done, %d pending, approx %3.2f hrs' % (toc / 3600.0, completed/(pending+completed*1.0) * 100.0,completed, pending, timerem),
+if ar.ready():
+print '\n'
+break
+time.sleep(1)
+toc = time.time()-tic
+#print ar.get()
+print '\rProcessed %d files in %3.2f hours.' % (njobs,toc / 3600.0)
+print '\n'
+# output
+#print(result)
+#thefile = open(audiopath + tpath[1] + '.txt', 'w')
+#for item in result:
+#    thefile.write("%s\n" % item)
+#close(thefile)
+if __name__ == "__main__":
+if len(sys.argv) >= 3:
+main(sys.argv[1],sys.argv[2])
+else:
+main(audiopath = '/audio', transform_path = 'dml_processing/sonic_annotator/vamp_plugins/bbc_speechmusic.n3', out_path = './')

Mercurial > hg > dml-open-backendtools

comparison ipcluster/test_sonic_annotator_notimeside.py @ 0:e34cf1b6fe09 tip