Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code

#!/usr/bin/python
#
# play_random_excerpts.py:
#    Play random sample of excerpts from a list of audio files
#
#    Script used in preliminary annotation experiment, with aim of obtaining
#    balanced sample of chunks for annotator `warm-up' phase
#    (see evaluate_annotations_random_excerpts.py)
#
# Author: Peter Foster
# (c) 2014 Peter Foster
#


#!/usr/bin/python

#
# play_random_excerpts.py:
#    Play random excerpts from a list of audio files
#
# Author: Peter Foster
# (c) 2014 Peter Foster
#

import glob
from scikits.audiolab import Sndfile
from scikits.audiolab import play
from pandas import DataFrame
import numpy as np

AudioPath = '/import/c4dm-datasets/chime/noise_background/PCCdata48kHz/train/background/'
OutputFile = '/import/c4dm-02/people/peterf/audex/datasets/chime_home/raw_data/exploratory/exploratory_labelling.csv'
FileList = glob.glob(AudioPath + '*.wav')
#Number of chunks to sample from each file
nChunksPerFile = 16
#Duration of each chunk in seconds
chunkDuration = 4
sampleRate = 48000

AudioChunks = []
np.random.seed(4756)

for audioFile in FileList:
    sf = Sndfile(audioFile, "r")
    if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))

    nChunksInFile = int(sf.nframes / (sf.samplerate * chunkDuration))
    #Sample random chunks from file
    sampledChunks = np.random.choice(nChunksInFile, min(nChunksInFile,nChunksPerFile), replace=False)

    for chunk in sampledChunks:
        frameStart = chunk * chunkDuration * sf.samplerate
        AudioChunks.append((audioFile, chunk, frameStart))

    sf.close()


#Create DataFrame
AudioChunks = DataFrame(AudioChunks)
AudioChunks.columns = ['audiofile', 'chunk', 'framestart']

print("Sampled " + str(len(AudioChunks)) + " in total, corresponding to " + str(len(AudioChunks) * chunkDuration / float(60)) + " minutes of audio.")


#Audio playback
for c in range(len(AudioChunks)):
    sf = Sndfile(AudioChunks['audiofile'][c], "r")
    if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
    sf.seek(AudioChunks['framestart'][c], mode='r')
    frames = sf.read_frames(chunkDuration * sf.samplerate)
    sf.close()

    S = ""

    while len(S) == 0:
        n = c + 1
        print("Starting playback of chunk  " + str(n) + " of " + str(len(AudioChunks)))
        play(frames.T, sampleRate);
        S = raw_input("Enter annotation string, or simply hit return to replay chunk. > ")

    f = open(OutputFile, 'a')
    f.write(AudioChunks['audiofile'][c] + "," + str(AudioChunks['chunk'][c]) + "," + S + "\n")
    f.close()
author	peterf
date	Tue, 07 Jul 2015 16:03:52 +0100
parents	75c79305d794
children