Mercurial > hg > dml-open-backendtools
diff pyspark/test_timeside_vamp_spark.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyspark/test_timeside_vamp_spark.py Sat Feb 20 18:14:24 2016 +0100 @@ -0,0 +1,59 @@ +# Part of DML (Digital Music Laboratory) +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#!/usr/local/spark-1.0.0-bin-hadoop2/bin/spark-submit +# -*- coding: utf-8 -*- +__author__="wolffd" +__date__ ="$11-Jul-2014 15:31:01$" + +from pyspark import SparkConf, SparkContext +# @todo: timeside has to be packed for multi-pc usage +from timeside_vamp import * +from os import walk + +# Running python applications through ./bin/pyspark is deprecated as of Spark 1.0. +# Use ./bin/spark-submit <python file> + + +def main(): + print "PySpark Telemeta and Vamp Test" + conf = (SparkConf() + .setMaster("local") + .setAppName("My app") + .set("spark.executor.memory", "1g")) + sc = SparkContext(conf = conf) + + # here come the wav file names + + mypath = '../../TimeSide/tests/samples/' + data = [] + for (dirpath, dirnames, filenames) in walk(mypath): + for file in filenames: + if file.endswith(".wav"): + data.append(os.path.join(dirpath, file)) + + # define distributed dataset + distData = sc.parallelize(data) + + # define map + m1 = distData.map(lambda x: transform(wav_file=x)) + + #process 2 + m1.take(2) + +if __name__ == "__main__": + main() + \ No newline at end of file