Mercurial > hg > dml-open-backendtools

# Part of DML (Digital Music Laboratory)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

#!/usr/local/spark-1.0.0-bin-hadoop2/bin/spark-submit
# -*- coding: utf-8 -*-
__author__="wolffd"
__date__ ="$11-Jul-2014 15:31:01$"

from pyspark import SparkConf, SparkContext
# @todo: timeside has to be packed for multi-pc usage
from timeside_vamp import *
from os import walk

# Running python applications through ./bin/pyspark is deprecated as of Spark 1.0.
# Use ./bin/spark-submit <python file>


def main():
    print "PySpark Telemeta and Vamp Test"
    conf = (SparkConf()
            .setMaster("local")
            .setAppName("My app")
            .set("spark.executor.memory", "1g"))
    sc = SparkContext(conf = conf)

    # here come the wav file names

    mypath = '../../TimeSide/tests/samples/'
    data = []
    for (dirpath, dirnames, filenames) in walk(mypath):
        for file in filenames:
            if file.endswith(".wav"):
                data.append(os.path.join(dirpath, file))

    # define distributed dataset
    distData = sc.parallelize(data)

    # define map
    m1 = distData.map(lambda x: transform(wav_file=x))

    #process 2
    m1.take(2)

if __name__ == "__main__":
    main()
author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children