peterf@1
|
1 #!/usr/bin/python
|
peterf@1
|
2 #
|
peterf@1
|
3 # play_random_excerpts.py:
|
peterf@1
|
4 # Play random sample of excerpts from a list of audio files
|
peterf@1
|
5 #
|
peterf@1
|
6 # Script used in preliminary annotation experiment, with aim of obtaining
|
peterf@1
|
7 # balanced sample of chunks for annotator `warm-up' phase
|
peterf@1
|
8 # (see evaluate_annotations_random_excerpts.py)
|
peterf@1
|
9 #
|
peterf@1
|
10 # Author: Peter Foster
|
peterf@1
|
11 # (c) 2014 Peter Foster
|
peterf@1
|
12 #
|
peterf@1
|
13
|
peterf@1
|
14
|
peterf@0
|
15 #!/usr/bin/python
|
peterf@0
|
16
|
peterf@0
|
17 #
|
peterf@0
|
18 # play_random_excerpts.py:
|
peterf@0
|
19 # Play random excerpts from a list of audio files
|
peterf@0
|
20 #
|
peterf@0
|
21 # Author: Peter Foster
|
peterf@0
|
22 # (c) 2014 Peter Foster
|
peterf@0
|
23 #
|
peterf@0
|
24
|
peterf@0
|
25 import glob
|
peterf@0
|
26 from scikits.audiolab import Sndfile
|
peterf@0
|
27 from scikits.audiolab import play
|
peterf@0
|
28 from pandas import DataFrame
|
peterf@0
|
29 import numpy as np
|
peterf@0
|
30
|
peterf@0
|
31 AudioPath = '/import/c4dm-datasets/chime/noise_background/PCCdata48kHz/train/background/'
|
peterf@0
|
32 OutputFile = '/import/c4dm-02/people/peterf/audex/datasets/chime_home/raw_data/exploratory/exploratory_labelling.csv'
|
peterf@0
|
33 FileList = glob.glob(AudioPath + '*.wav')
|
peterf@0
|
34 #Number of chunks to sample from each file
|
peterf@0
|
35 nChunksPerFile = 16
|
peterf@0
|
36 #Duration of each chunk in seconds
|
peterf@0
|
37 chunkDuration = 4
|
peterf@0
|
38 sampleRate = 48000
|
peterf@0
|
39
|
peterf@0
|
40 AudioChunks = []
|
peterf@0
|
41 np.random.seed(4756)
|
peterf@0
|
42
|
peterf@0
|
43 for audioFile in FileList:
|
peterf@0
|
44 sf = Sndfile(audioFile, "r")
|
peterf@0
|
45 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
|
peterf@0
|
46
|
peterf@0
|
47 nChunksInFile = int(sf.nframes / (sf.samplerate * chunkDuration))
|
peterf@0
|
48 #Sample random chunks from file
|
peterf@0
|
49 sampledChunks = np.random.choice(nChunksInFile, min(nChunksInFile,nChunksPerFile), replace=False)
|
peterf@0
|
50
|
peterf@0
|
51 for chunk in sampledChunks:
|
peterf@0
|
52 frameStart = chunk * chunkDuration * sf.samplerate
|
peterf@0
|
53 AudioChunks.append((audioFile, chunk, frameStart))
|
peterf@0
|
54
|
peterf@0
|
55 sf.close()
|
peterf@0
|
56
|
peterf@0
|
57
|
peterf@0
|
58 #Create DataFrame
|
peterf@0
|
59 AudioChunks = DataFrame(AudioChunks)
|
peterf@0
|
60 AudioChunks.columns = ['audiofile', 'chunk', 'framestart']
|
peterf@0
|
61
|
peterf@0
|
62 print("Sampled " + str(len(AudioChunks)) + " in total, corresponding to " + str(len(AudioChunks) * chunkDuration / float(60)) + " minutes of audio.")
|
peterf@0
|
63
|
peterf@0
|
64
|
peterf@0
|
65 #Audio playback
|
peterf@0
|
66 for c in range(len(AudioChunks)):
|
peterf@0
|
67 sf = Sndfile(AudioChunks['audiofile'][c], "r")
|
peterf@0
|
68 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
|
peterf@0
|
69 sf.seek(AudioChunks['framestart'][c], mode='r')
|
peterf@0
|
70 frames = sf.read_frames(chunkDuration * sf.samplerate)
|
peterf@0
|
71 sf.close()
|
peterf@0
|
72
|
peterf@0
|
73 S = ""
|
peterf@0
|
74
|
peterf@0
|
75 while len(S) == 0:
|
peterf@0
|
76 n = c + 1
|
peterf@0
|
77 print("Starting playback of chunk " + str(n) + " of " + str(len(AudioChunks)))
|
peterf@0
|
78 play(frames.T, sampleRate);
|
peterf@0
|
79 S = raw_input("Enter annotation string, or simply hit return to replay chunk. > ")
|
peterf@0
|
80
|
peterf@0
|
81 f = open(OutputFile, 'a')
|
peterf@0
|
82 f.write(AudioChunks['audiofile'][c] + "," + str(AudioChunks['chunk'][c]) + "," + S + "\n")
|
peterf@0
|
83 f.close()
|