chime-home-dataset-annotation-and-baseline-evaluation-code: annotation_scripts/warmup_phase/play_random

annotate annotation_scripts/warmup_phase/play_random_excerpts.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.

author	peterf
date	Mon, 01 Feb 2016 21:35:27 +0000
parents	f079d2de4aa2
children

rev	line source
peterf@1	1 #!/usr/bin/python
peterf@1	2 #
peterf@1	3 # play_random_excerpts.py:
peterf@1	4 # Play random sample of excerpts from a list of audio files
peterf@1	5 #
peterf@1	6 # Script used in preliminary annotation experiment, with aim of obtaining
peterf@1	7 # balanced sample of chunks for annotator `warm-up' phase
peterf@1	8 # (see evaluate_annotations_random_excerpts.py)
peterf@1	9 #
peterf@1	10 # Author: Peter Foster
peterf@1	11 # (c) 2014 Peter Foster
peterf@1	12 #
peterf@1	13
peterf@1	14
peterf@0	15 #!/usr/bin/python
peterf@0	16
peterf@0	17 #
peterf@0	18 # play_random_excerpts.py:
peterf@0	19 # Play random excerpts from a list of audio files
peterf@0	20 #
peterf@0	21 # Author: Peter Foster
peterf@0	22 # (c) 2014 Peter Foster
peterf@0	23 #
peterf@0	24
peterf@0	25 import glob
peterf@0	26 from scikits.audiolab import Sndfile
peterf@0	27 from scikits.audiolab import play
peterf@0	28 from pandas import DataFrame
peterf@0	29 import numpy as np
peterf@0	30
peterf@0	31 AudioPath = '/import/c4dm-datasets/chime/noise_background/PCCdata48kHz/train/background/'
peterf@0	32 OutputFile = '/import/c4dm-02/people/peterf/audex/datasets/chime_home/raw_data/exploratory/exploratory_labelling.csv'
peterf@0	33 FileList = glob.glob(AudioPath + '*.wav')
peterf@0	34 #Number of chunks to sample from each file
peterf@0	35 nChunksPerFile = 16
peterf@0	36 #Duration of each chunk in seconds
peterf@0	37 chunkDuration = 4
peterf@0	38 sampleRate = 48000
peterf@0	39
peterf@0	40 AudioChunks = []
peterf@0	41 np.random.seed(4756)
peterf@0	42
peterf@0	43 for audioFile in FileList:
peterf@0	44 sf = Sndfile(audioFile, "r")
peterf@0	45 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
peterf@0	46
peterf@0	47 nChunksInFile = int(sf.nframes / (sf.samplerate * chunkDuration))
peterf@0	48 #Sample random chunks from file
peterf@0	49 sampledChunks = np.random.choice(nChunksInFile, min(nChunksInFile,nChunksPerFile), replace=False)
peterf@0	50
peterf@0	51 for chunk in sampledChunks:
peterf@0	52 frameStart = chunk * chunkDuration * sf.samplerate
peterf@0	53 AudioChunks.append((audioFile, chunk, frameStart))
peterf@0	54
peterf@0	55 sf.close()
peterf@0	56
peterf@0	57
peterf@0	58 #Create DataFrame
peterf@0	59 AudioChunks = DataFrame(AudioChunks)
peterf@0	60 AudioChunks.columns = ['audiofile', 'chunk', 'framestart']
peterf@0	61
peterf@0	62 print("Sampled " + str(len(AudioChunks)) + " in total, corresponding to " + str(len(AudioChunks) * chunkDuration / float(60)) + " minutes of audio.")
peterf@0	63
peterf@0	64
peterf@0	65 #Audio playback
peterf@0	66 for c in range(len(AudioChunks)):
peterf@0	67 sf = Sndfile(AudioChunks['audiofile'][c], "r")
peterf@0	68 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
peterf@0	69 sf.seek(AudioChunks['framestart'][c], mode='r')
peterf@0	70 frames = sf.read_frames(chunkDuration * sf.samplerate)
peterf@0	71 sf.close()
peterf@0	72
peterf@0	73 S = ""
peterf@0	74
peterf@0	75 while len(S) == 0:
peterf@0	76 n = c + 1
peterf@0	77 print("Starting playback of chunk " + str(n) + " of " + str(len(AudioChunks)))
peterf@0	78 play(frames.T, sampleRate);
peterf@0	79 S = raw_input("Enter annotation string, or simply hit return to replay chunk. > ")
peterf@0	80
peterf@0	81 f = open(OutputFile, 'a')
peterf@0	82 f.write(AudioChunks['audiofile'][c] + "," + str(AudioChunks['chunk'][c]) + "," + S + "\n")
peterf@0	83 f.close()

Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code

annotate annotation_scripts/warmup_phase/play_random_excerpts.py @ 5:b523456082ca tip