peterf@0: #!/usr/bin/python peterf@0: peterf@0: # peterf@0: # annotationkit_play_chunks.py: peterf@1: # Play excerpts from a list of audio files as specified by protocol CSV file peterf@1: # and obtain annotations using text interface peterf@1: # peterf@1: # See annotationkit_play_chunks_wrapper.sh for usage example peterf@0: # peterf@0: # Author: Peter Foster peterf@1: # (c) 2015 Peter Foster peterf@0: # peterf@0: peterf@0: from scikits.audiolab import Sndfile peterf@0: from scikits.audiolab import play peterf@0: from pandas import DataFrame peterf@0: import argparse peterf@0: import os peterf@0: import sys peterf@0: from threading import Thread peterf@0: peterf@0: #Duration of each chunk in seconds peterf@0: chunkDuration = 4 peterf@0: #Expected sample rate peterf@0: sampleRate = 48000 peterf@0: peterf@0: #Annotations containing characters outside this set will not validate peterf@0: PermittedAnnotationCharacters = set('cmfvpbosU') peterf@0: #These characters may only appear by themself peterf@0: LoneAnnotationCharacters = set('sU') peterf@0: LabelMap = "\ peterf@0: c: child speech\n\ peterf@0: m: adult male speech\n\ peterf@0: f: adult female speech\n\ peterf@0: v: video Game/TV\n\ peterf@0: p: percussive sounds, e.g. crash, bang, knock, footsteps\n\ peterf@0: b: broadband noise, e.g. household appliances\n\ peterf@0: o: other identifiable sounds\n\ peterf@0: s: silence / background noise only\n\ peterf@0: U: flag chunk (unidentifiable sounds, not sure how to label)\ peterf@0: " peterf@0: peterf@0: parser = argparse.ArgumentParser() peterf@0: parser.add_argument('--protocolfile', help="Path to annotation protocol file") peterf@0: parser.add_argument('--annotationfile', help="Path to annotation file") peterf@0: args = vars(parser.parse_args()) peterf@0: peterf@0: #InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv' peterf@0: InputFile = args['protocolfile'] peterf@0: #OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv' peterf@0: OutputFile = args['annotationfile'] peterf@0: peterf@0: assert(os.path.isfile(InputFile)) peterf@0: peterf@0: AudioChunks = DataFrame.from_csv(InputFile) peterf@0: peterf@0: if not(os.path.isfile(OutputFile)): peterf@0: #Initialise annotation file peterf@0: AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation']) peterf@0: AnnotatedChunks.to_csv(OutputFile) peterf@0: peterf@0: AnnotatedChunks = DataFrame.from_csv(OutputFile) peterf@0: peterf@0: #Check index integrity peterf@0: assert(all(AnnotatedChunks.index == AudioChunks.index)) peterf@0: peterf@0: #Audio playback peterf@0: for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]: peterf@0: sf = Sndfile(AudioChunks['audiofile'].ix[i], "r") peterf@0: if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) peterf@0: sf.seek(AudioChunks['framestart'].ix[i], mode='r') peterf@0: frames = sf.read_frames(chunkDuration * sf.samplerate) peterf@0: sf.close() peterf@0: peterf@0: annotationString = "" peterf@0: isValidated = False peterf@0: while not(isValidated): peterf@0: print("Starting playback of chunk " + str(i+1) + " of " + str(len(AudioChunks))) peterf@0: peterf@0: #Play chunk in background thread; this allows annotation to begin immediately peterf@0: #We set stdout to devnull, to suppress any output originating from play() function peterf@0: F = open(os.devnull, 'w') peterf@0: old_stdout = sys.stdout peterf@0: sys.stdout = F peterf@0: myFunction = lambda frames, sampleRate: play(frames, sampleRate) peterf@0: myThread = Thread(target=myFunction, args=(frames.T, sampleRate)) peterf@0: myThread.start() peterf@0: peterf@0: #Unthreaded code here peterf@0: #play(frames.T, sampleRate) peterf@0: peterf@0: old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ") peterf@0: annotationString = raw_input() peterf@0: annotationString = annotationString.strip() peterf@0: peterf@0: #Wait for playback thread to finish; we don't allow simultaneous playback peterf@0: myThread.join() peterf@0: #Recover old stdout, so we can use print() peterf@0: sys.stdout = old_stdout peterf@0: peterf@0: peterf@0: #Validate annotation string peterf@0: if PermittedAnnotationCharacters.issuperset(annotationString): peterf@0: if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1: peterf@0: print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.") peterf@0: elif len(annotationString) > 0: peterf@0: isValidated = True peterf@0: else: peterf@0: if annotationString not in {'?', 'help'}: peterf@0: print("Invalid annotation string.") peterf@0: print("Valid characters are: ") peterf@0: print("------------------------------") peterf@0: print(LabelMap) peterf@0: print("------------------------------") peterf@0: peterf@0: #Amend and write annotations to csv peterf@0: AnnotatedChunks['annotation'].ix[i] = annotationString peterf@0: AnnotatedChunks.to_csv(OutputFile) peterf@1: