peterf@1: #!/usr/bin/python peterf@1: # peterf@1: # play_random_excerpts.py: peterf@1: # Play random sample of excerpts from a list of audio files peterf@1: # peterf@1: # Script used in preliminary annotation experiment, with aim of obtaining peterf@1: # balanced sample of chunks for annotator `warm-up' phase peterf@1: # (see evaluate_annotations_random_excerpts.py) peterf@1: # peterf@1: # Author: Peter Foster peterf@1: # (c) 2014 Peter Foster peterf@1: # peterf@1: peterf@1: peterf@0: #!/usr/bin/python peterf@0: peterf@0: # peterf@0: # play_random_excerpts.py: peterf@0: # Play random excerpts from a list of audio files peterf@0: # peterf@0: # Author: Peter Foster peterf@0: # (c) 2014 Peter Foster peterf@0: # peterf@0: peterf@0: import glob peterf@0: from scikits.audiolab import Sndfile peterf@0: from scikits.audiolab import play peterf@0: from pandas import DataFrame peterf@0: import numpy as np peterf@0: peterf@0: AudioPath = '/import/c4dm-datasets/chime/noise_background/PCCdata48kHz/train/background/' peterf@0: OutputFile = '/import/c4dm-02/people/peterf/audex/datasets/chime_home/raw_data/exploratory/exploratory_labelling.csv' peterf@0: FileList = glob.glob(AudioPath + '*.wav') peterf@0: #Number of chunks to sample from each file peterf@0: nChunksPerFile = 16 peterf@0: #Duration of each chunk in seconds peterf@0: chunkDuration = 4 peterf@0: sampleRate = 48000 peterf@0: peterf@0: AudioChunks = [] peterf@0: np.random.seed(4756) peterf@0: peterf@0: for audioFile in FileList: peterf@0: sf = Sndfile(audioFile, "r") peterf@0: if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) peterf@0: peterf@0: nChunksInFile = int(sf.nframes / (sf.samplerate * chunkDuration)) peterf@0: #Sample random chunks from file peterf@0: sampledChunks = np.random.choice(nChunksInFile, min(nChunksInFile,nChunksPerFile), replace=False) peterf@0: peterf@0: for chunk in sampledChunks: peterf@0: frameStart = chunk * chunkDuration * sf.samplerate peterf@0: AudioChunks.append((audioFile, chunk, frameStart)) peterf@0: peterf@0: sf.close() peterf@0: peterf@0: peterf@0: #Create DataFrame peterf@0: AudioChunks = DataFrame(AudioChunks) peterf@0: AudioChunks.columns = ['audiofile', 'chunk', 'framestart'] peterf@0: peterf@0: print("Sampled " + str(len(AudioChunks)) + " in total, corresponding to " + str(len(AudioChunks) * chunkDuration / float(60)) + " minutes of audio.") peterf@0: peterf@0: peterf@0: #Audio playback peterf@0: for c in range(len(AudioChunks)): peterf@0: sf = Sndfile(AudioChunks['audiofile'][c], "r") peterf@0: if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) peterf@0: sf.seek(AudioChunks['framestart'][c], mode='r') peterf@0: frames = sf.read_frames(chunkDuration * sf.samplerate) peterf@0: sf.close() peterf@0: peterf@0: S = "" peterf@0: peterf@0: while len(S) == 0: peterf@0: n = c + 1 peterf@0: print("Starting playback of chunk " + str(n) + " of " + str(len(AudioChunks))) peterf@0: play(frames.T, sampleRate); peterf@0: S = raw_input("Enter annotation string, or simply hit return to replay chunk. > ") peterf@0: peterf@0: f = open(OutputFile, 'a') peterf@0: f.write(AudioChunks['audiofile'][c] + "," + str(AudioChunks['chunk'][c]) + "," + S + "\n") peterf@0: f.close()