peterf@0
|
1 #!/usr/bin/python
|
peterf@0
|
2
|
peterf@0
|
3 #
|
peterf@0
|
4 # annotationkit_play_chunks.py:
|
peterf@1
|
5 # Play excerpts from a list of audio files as specified by protocol CSV file
|
peterf@1
|
6 # and obtain annotations using text interface
|
peterf@1
|
7 #
|
peterf@1
|
8 # See annotationkit_play_chunks_wrapper.sh for usage example
|
peterf@0
|
9 #
|
peterf@0
|
10 # Author: Peter Foster
|
peterf@1
|
11 # (c) 2015 Peter Foster
|
peterf@0
|
12 #
|
peterf@0
|
13
|
peterf@0
|
14 from scikits.audiolab import Sndfile
|
peterf@0
|
15 from scikits.audiolab import play
|
peterf@0
|
16 from pandas import DataFrame
|
peterf@0
|
17 import argparse
|
peterf@0
|
18 import os
|
peterf@0
|
19 import sys
|
peterf@0
|
20 from threading import Thread
|
peterf@0
|
21
|
peterf@0
|
22 #Duration of each chunk in seconds
|
peterf@0
|
23 chunkDuration = 4
|
peterf@0
|
24 #Expected sample rate
|
peterf@0
|
25 sampleRate = 48000
|
peterf@0
|
26
|
peterf@0
|
27 #Annotations containing characters outside this set will not validate
|
peterf@0
|
28 PermittedAnnotationCharacters = set('cmfvpbosU')
|
peterf@0
|
29 #These characters may only appear by themself
|
peterf@0
|
30 LoneAnnotationCharacters = set('sU')
|
peterf@0
|
31 LabelMap = "\
|
peterf@0
|
32 c: child speech\n\
|
peterf@0
|
33 m: adult male speech\n\
|
peterf@0
|
34 f: adult female speech\n\
|
peterf@0
|
35 v: video Game/TV\n\
|
peterf@0
|
36 p: percussive sounds, e.g. crash, bang, knock, footsteps\n\
|
peterf@0
|
37 b: broadband noise, e.g. household appliances\n\
|
peterf@0
|
38 o: other identifiable sounds\n\
|
peterf@0
|
39 s: silence / background noise only\n\
|
peterf@0
|
40 U: flag chunk (unidentifiable sounds, not sure how to label)\
|
peterf@0
|
41 "
|
peterf@0
|
42
|
peterf@0
|
43 parser = argparse.ArgumentParser()
|
peterf@0
|
44 parser.add_argument('--protocolfile', help="Path to annotation protocol file")
|
peterf@0
|
45 parser.add_argument('--annotationfile', help="Path to annotation file")
|
peterf@0
|
46 args = vars(parser.parse_args())
|
peterf@0
|
47
|
peterf@0
|
48 #InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv'
|
peterf@0
|
49 InputFile = args['protocolfile']
|
peterf@0
|
50 #OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv'
|
peterf@0
|
51 OutputFile = args['annotationfile']
|
peterf@0
|
52
|
peterf@0
|
53 assert(os.path.isfile(InputFile))
|
peterf@0
|
54
|
peterf@0
|
55 AudioChunks = DataFrame.from_csv(InputFile)
|
peterf@0
|
56
|
peterf@0
|
57 if not(os.path.isfile(OutputFile)):
|
peterf@0
|
58 #Initialise annotation file
|
peterf@0
|
59 AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation'])
|
peterf@0
|
60 AnnotatedChunks.to_csv(OutputFile)
|
peterf@0
|
61
|
peterf@0
|
62 AnnotatedChunks = DataFrame.from_csv(OutputFile)
|
peterf@0
|
63
|
peterf@0
|
64 #Check index integrity
|
peterf@0
|
65 assert(all(AnnotatedChunks.index == AudioChunks.index))
|
peterf@0
|
66
|
peterf@0
|
67 #Audio playback
|
peterf@0
|
68 for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]:
|
peterf@0
|
69 sf = Sndfile(AudioChunks['audiofile'].ix[i], "r")
|
peterf@0
|
70 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
|
peterf@0
|
71 sf.seek(AudioChunks['framestart'].ix[i], mode='r')
|
peterf@0
|
72 frames = sf.read_frames(chunkDuration * sf.samplerate)
|
peterf@0
|
73 sf.close()
|
peterf@0
|
74
|
peterf@0
|
75 annotationString = ""
|
peterf@0
|
76 isValidated = False
|
peterf@0
|
77 while not(isValidated):
|
peterf@0
|
78 print("Starting playback of chunk " + str(i+1) + " of " + str(len(AudioChunks)))
|
peterf@0
|
79
|
peterf@0
|
80 #Play chunk in background thread; this allows annotation to begin immediately
|
peterf@0
|
81 #We set stdout to devnull, to suppress any output originating from play() function
|
peterf@0
|
82 F = open(os.devnull, 'w')
|
peterf@0
|
83 old_stdout = sys.stdout
|
peterf@0
|
84 sys.stdout = F
|
peterf@0
|
85 myFunction = lambda frames, sampleRate: play(frames, sampleRate)
|
peterf@0
|
86 myThread = Thread(target=myFunction, args=(frames.T, sampleRate))
|
peterf@0
|
87 myThread.start()
|
peterf@0
|
88
|
peterf@0
|
89 #Unthreaded code here
|
peterf@0
|
90 #play(frames.T, sampleRate)
|
peterf@0
|
91
|
peterf@0
|
92 old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ")
|
peterf@0
|
93 annotationString = raw_input()
|
peterf@0
|
94 annotationString = annotationString.strip()
|
peterf@0
|
95
|
peterf@0
|
96 #Wait for playback thread to finish; we don't allow simultaneous playback
|
peterf@0
|
97 myThread.join()
|
peterf@0
|
98 #Recover old stdout, so we can use print()
|
peterf@0
|
99 sys.stdout = old_stdout
|
peterf@0
|
100
|
peterf@0
|
101
|
peterf@0
|
102 #Validate annotation string
|
peterf@0
|
103 if PermittedAnnotationCharacters.issuperset(annotationString):
|
peterf@0
|
104 if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1:
|
peterf@0
|
105 print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.")
|
peterf@0
|
106 elif len(annotationString) > 0:
|
peterf@0
|
107 isValidated = True
|
peterf@0
|
108 else:
|
peterf@0
|
109 if annotationString not in {'?', 'help'}:
|
peterf@0
|
110 print("Invalid annotation string.")
|
peterf@0
|
111 print("Valid characters are: ")
|
peterf@0
|
112 print("------------------------------")
|
peterf@0
|
113 print(LabelMap)
|
peterf@0
|
114 print("------------------------------")
|
peterf@0
|
115
|
peterf@0
|
116 #Amend and write annotations to csv
|
peterf@0
|
117 AnnotatedChunks['annotation'].ix[i] = annotationString
|
peterf@0
|
118 AnnotatedChunks.to_csv(OutputFile)
|
peterf@1
|
119
|