peterf@0: #!/usr/bin/python peterf@0: # peterf@0: # annotationkit_create_annotation_protocol.py: peterf@1: # Prepare annotation protocol CSV from list of audio files peterf@1: # peterf@1: # Read list of files from standard input and write CSV to standard output peterf@1: # See annotationkit_create_annotation_protocol_wrapper.sh for usage example peterf@0: # peterf@0: # Author: Peter Foster peterf@1: # (c) 2015 Peter Foster peterf@0: # peterf@0: peterf@0: import fileinput peterf@0: from scikits.audiolab import Sndfile peterf@0: from pandas import DataFrame peterf@0: import numpy as np peterf@0: import sys peterf@0: peterf@0: #Maximum number of chunks to sample from each file peterf@0: nChunksPerFile = np.inf peterf@0: #Duration of each chunk in seconds peterf@0: chunkDuration = 4 peterf@0: #Expected sample rate peterf@0: sampleRate = 48000 peterf@0: peterf@0: AudioChunks = [] peterf@0: np.random.seed(4756) peterf@0: peterf@0: for audioFile in fileinput.input(): peterf@0: audioFile = audioFile.strip() peterf@0: sf = Sndfile(audioFile, "r") peterf@0: if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) peterf@0: peterf@0: nChunksInFile = int(sf.nframes / (sf.samplerate * chunkDuration)) peterf@0: #Sample without replacement random chunks from file peterf@0: sampledChunks = np.random.choice(nChunksInFile, min(nChunksInFile,nChunksPerFile), replace=False) peterf@0: peterf@0: for chunk in sampledChunks: peterf@0: frameStart = chunk * chunkDuration * sf.samplerate peterf@0: AudioChunks.append((audioFile, chunk, frameStart)) peterf@0: peterf@0: sf.close() peterf@0: peterf@0: peterf@0: #Create DataFrame peterf@0: AudioChunks = DataFrame(AudioChunks) peterf@0: AudioChunks.columns = ['audiofile', 'chunk', 'framestart'] peterf@0: peterf@0: sys.stderr.write("Processed " + str(len(AudioChunks)) + " chunks in total, corresponding to " + str(len(AudioChunks) * chunkDuration / float(60)) + " minutes of audio.\n") peterf@0: peterf@0: #Write to CSV (stdout) peterf@0: AudioChunks.to_csv(sys.stdout)