Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
view annotation_scripts/annotationkit_play_chunks.py @ 5:b523456082ca tip
Update path to dataset and reflect modified chunk naming convention.
author | peterf |
---|---|
date | Mon, 01 Feb 2016 21:35:27 +0000 |
parents | f079d2de4aa2 |
children |
line wrap: on
line source
#!/usr/bin/python # # annotationkit_play_chunks.py: # Play excerpts from a list of audio files as specified by protocol CSV file # and obtain annotations using text interface # # See annotationkit_play_chunks_wrapper.sh for usage example # # Author: Peter Foster # (c) 2015 Peter Foster # from scikits.audiolab import Sndfile from scikits.audiolab import play from pandas import DataFrame import argparse import os import sys from threading import Thread #Duration of each chunk in seconds chunkDuration = 4 #Expected sample rate sampleRate = 48000 #Annotations containing characters outside this set will not validate PermittedAnnotationCharacters = set('cmfvpbosU') #These characters may only appear by themself LoneAnnotationCharacters = set('sU') LabelMap = "\ c: child speech\n\ m: adult male speech\n\ f: adult female speech\n\ v: video Game/TV\n\ p: percussive sounds, e.g. crash, bang, knock, footsteps\n\ b: broadband noise, e.g. household appliances\n\ o: other identifiable sounds\n\ s: silence / background noise only\n\ U: flag chunk (unidentifiable sounds, not sure how to label)\ " parser = argparse.ArgumentParser() parser.add_argument('--protocolfile', help="Path to annotation protocol file") parser.add_argument('--annotationfile', help="Path to annotation file") args = vars(parser.parse_args()) #InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv' InputFile = args['protocolfile'] #OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv' OutputFile = args['annotationfile'] assert(os.path.isfile(InputFile)) AudioChunks = DataFrame.from_csv(InputFile) if not(os.path.isfile(OutputFile)): #Initialise annotation file AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation']) AnnotatedChunks.to_csv(OutputFile) AnnotatedChunks = DataFrame.from_csv(OutputFile) #Check index integrity assert(all(AnnotatedChunks.index == AudioChunks.index)) #Audio playback for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]: sf = Sndfile(AudioChunks['audiofile'].ix[i], "r") if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate)) sf.seek(AudioChunks['framestart'].ix[i], mode='r') frames = sf.read_frames(chunkDuration * sf.samplerate) sf.close() annotationString = "" isValidated = False while not(isValidated): print("Starting playback of chunk " + str(i+1) + " of " + str(len(AudioChunks))) #Play chunk in background thread; this allows annotation to begin immediately #We set stdout to devnull, to suppress any output originating from play() function F = open(os.devnull, 'w') old_stdout = sys.stdout sys.stdout = F myFunction = lambda frames, sampleRate: play(frames, sampleRate) myThread = Thread(target=myFunction, args=(frames.T, sampleRate)) myThread.start() #Unthreaded code here #play(frames.T, sampleRate) old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ") annotationString = raw_input() annotationString = annotationString.strip() #Wait for playback thread to finish; we don't allow simultaneous playback myThread.join() #Recover old stdout, so we can use print() sys.stdout = old_stdout #Validate annotation string if PermittedAnnotationCharacters.issuperset(annotationString): if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1: print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.") elif len(annotationString) > 0: isValidated = True else: if annotationString not in {'?', 'help'}: print("Invalid annotation string.") print("Valid characters are: ") print("------------------------------") print(LabelMap) print("------------------------------") #Amend and write annotations to csv AnnotatedChunks['annotation'].ix[i] = annotationString AnnotatedChunks.to_csv(OutputFile)