Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
view annotation_scripts/warmup_phase/play_random_excerpts.py @ 1:f079d2de4aa2
Headers and LICENSE; VERSION
author | peterf |
---|---|
date | Tue, 07 Jul 2015 16:03:52 +0100 |
parents | 75c79305d794 |
children |
line wrap: on
line source
#!/usr/bin/python # # play_random_excerpts.py: # Play random sample of excerpts from a list of audio files # # Script used in preliminary annotation experiment, with aim of obtaining # balanced sample of chunks for annotator `warm-up' phase # (see evaluate_annotations_random_excerpts.py) # # Author: Peter Foster # (c) 2014 Peter Foster # #!/usr/bin/python # # play_random_excerpts.py: # Play random excerpts from a list of audio files # # Author: Peter Foster # (c) 2014 Peter Foster # import glob from scikits.audiolab import Sndfile from scikits.audiolab import play from pandas import DataFrame import numpy as np AudioPath = '/import/c4dm-datasets/chime/noise_background/PCCdata48kHz/train/background/' OutputFile = '/import/c4dm-02/people/peterf/audex/datasets/chime_home/raw_data/exploratory/exploratory_labelling.csv' FileList = glob.glob(AudioPath + '*.wav') #Number of chunks to sample from each file nChunksPerFile = 16 #Duration of each chunk in seconds chunkDuration = 4 sampleRate = 48000 AudioChunks = [] np.random.seed(4756) for audioFile in FileList: sf = Sndfile(audioFile, "r") if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) nChunksInFile = int(sf.nframes / (sf.samplerate * chunkDuration)) #Sample random chunks from file sampledChunks = np.random.choice(nChunksInFile, min(nChunksInFile,nChunksPerFile), replace=False) for chunk in sampledChunks: frameStart = chunk * chunkDuration * sf.samplerate AudioChunks.append((audioFile, chunk, frameStart)) sf.close() #Create DataFrame AudioChunks = DataFrame(AudioChunks) AudioChunks.columns = ['audiofile', 'chunk', 'framestart'] print("Sampled " + str(len(AudioChunks)) + " in total, corresponding to " + str(len(AudioChunks) * chunkDuration / float(60)) + " minutes of audio.") #Audio playback for c in range(len(AudioChunks)): sf = Sndfile(AudioChunks['audiofile'][c], "r") if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) sf.seek(AudioChunks['framestart'][c], mode='r') frames = sf.read_frames(chunkDuration * sf.samplerate) sf.close() S = "" while len(S) == 0: n = c + 1 print("Starting playback of chunk " + str(n) + " of " + str(len(AudioChunks))) play(frames.T, sampleRate); S = raw_input("Enter annotation string, or simply hit return to replay chunk. > ") f = open(OutputFile, 'a') f.write(AudioChunks['audiofile'][c] + "," + str(AudioChunks['chunk'][c]) + "," + S + "\n") f.close()