peterf@0: #!/usr/bin/python
peterf@0: 
peterf@0: #
peterf@0: # annotationkit_play_chunks.py:
peterf@1: #    Play excerpts from a list of audio files as specified by protocol CSV file
peterf@1: #    and obtain annotations using text interface
peterf@1: #
peterf@1: #    See annotationkit_play_chunks_wrapper.sh for usage example
peterf@0: #
peterf@0: # Author: Peter Foster
peterf@1: # (c) 2015 Peter Foster
peterf@0: #
peterf@0: 
peterf@0: from scikits.audiolab import Sndfile
peterf@0: from scikits.audiolab import play
peterf@0: from pandas import DataFrame
peterf@0: import argparse
peterf@0: import os
peterf@0: import sys
peterf@0: from threading import Thread
peterf@0: 
peterf@0: #Duration of each chunk in seconds
peterf@0: chunkDuration = 4
peterf@0: #Expected sample rate
peterf@0: sampleRate = 48000
peterf@0: 
peterf@0: #Annotations containing characters outside this set will not validate
peterf@0: PermittedAnnotationCharacters = set('cmfvpbosU')
peterf@0: #These characters may only appear by themself
peterf@0: LoneAnnotationCharacters = set('sU')
peterf@0: LabelMap = "\
peterf@0: c:   child speech\n\
peterf@0: m:   adult male speech\n\
peterf@0: f:   adult female speech\n\
peterf@0: v:   video Game/TV\n\
peterf@0: p:   percussive sounds, e.g. crash, bang, knock, footsteps\n\
peterf@0: b:   broadband noise, e.g. household appliances\n\
peterf@0: o:   other identifiable sounds\n\
peterf@0: s:   silence / background noise only\n\
peterf@0: U:   flag chunk (unidentifiable sounds, not sure how to label)\
peterf@0: "
peterf@0: 
peterf@0: parser = argparse.ArgumentParser()
peterf@0: parser.add_argument('--protocolfile', help="Path to annotation protocol file")
peterf@0: parser.add_argument('--annotationfile', help="Path to annotation file")
peterf@0: args = vars(parser.parse_args())
peterf@0: 
peterf@0: #InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv'
peterf@0: InputFile = args['protocolfile']
peterf@0: #OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv'
peterf@0: OutputFile = args['annotationfile']
peterf@0: 
peterf@0: assert(os.path.isfile(InputFile))
peterf@0: 
peterf@0: AudioChunks = DataFrame.from_csv(InputFile)
peterf@0: 
peterf@0: if not(os.path.isfile(OutputFile)):
peterf@0:     #Initialise annotation file
peterf@0:     AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation'])
peterf@0:     AnnotatedChunks.to_csv(OutputFile)
peterf@0:     
peterf@0: AnnotatedChunks = DataFrame.from_csv(OutputFile)
peterf@0: 
peterf@0: #Check index integrity
peterf@0: assert(all(AnnotatedChunks.index  == AudioChunks.index))
peterf@0: 
peterf@0: #Audio playback
peterf@0: for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]:
peterf@0:     sf = Sndfile(AudioChunks['audiofile'].ix[i], "r")
peterf@0:     if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
peterf@0:     sf.seek(AudioChunks['framestart'].ix[i], mode='r')    
peterf@0:     frames = sf.read_frames(chunkDuration * sf.samplerate)
peterf@0:     sf.close()
peterf@0: 
peterf@0:     annotationString = ""
peterf@0:     isValidated = False
peterf@0:     while not(isValidated):
peterf@0:         print("Starting playback of chunk  " + str(i+1) + " of " + str(len(AudioChunks)))
peterf@0:         
peterf@0:         #Play chunk in background thread; this allows annotation to begin immediately
peterf@0:         #We set stdout to devnull, to suppress any output originating from play() function
peterf@0:         F = open(os.devnull, 'w')
peterf@0:         old_stdout = sys.stdout
peterf@0:         sys.stdout = F
peterf@0:         myFunction = lambda frames, sampleRate: play(frames, sampleRate)
peterf@0:         myThread = Thread(target=myFunction, args=(frames.T, sampleRate))
peterf@0:         myThread.start()
peterf@0:         
peterf@0:         #Unthreaded code here
peterf@0:         #play(frames.T, sampleRate)
peterf@0:         
peterf@0:         old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ")
peterf@0:         annotationString = raw_input()
peterf@0:         annotationString = annotationString.strip()
peterf@0:         
peterf@0:         #Wait for playback thread to finish; we don't allow simultaneous playback
peterf@0:         myThread.join()
peterf@0:         #Recover old stdout, so we can use print()
peterf@0:         sys.stdout = old_stdout
peterf@0:         
peterf@0:         
peterf@0:         #Validate annotation string
peterf@0:         if PermittedAnnotationCharacters.issuperset(annotationString):
peterf@0:             if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1:
peterf@0:                 print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.")
peterf@0:             elif len(annotationString) > 0:
peterf@0:                 isValidated = True
peterf@0:         else:
peterf@0:             if annotationString not in {'?', 'help'}:
peterf@0:                 print("Invalid annotation string.")
peterf@0:             print("Valid characters are: ")
peterf@0:             print("------------------------------")
peterf@0:             print(LabelMap)
peterf@0:             print("------------------------------")
peterf@0:                
peterf@0:     #Amend and write annotations to csv           
peterf@0:     AnnotatedChunks['annotation'].ix[i] = annotationString
peterf@0:     AnnotatedChunks.to_csv(OutputFile)
peterf@1: