annotate annotation_scripts/annotationkit_play_chunks.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.
author peterf
date Mon, 01 Feb 2016 21:35:27 +0000
parents f079d2de4aa2
children
rev   line source
peterf@0 1 #!/usr/bin/python
peterf@0 2
peterf@0 3 #
peterf@0 4 # annotationkit_play_chunks.py:
peterf@1 5 # Play excerpts from a list of audio files as specified by protocol CSV file
peterf@1 6 # and obtain annotations using text interface
peterf@1 7 #
peterf@1 8 # See annotationkit_play_chunks_wrapper.sh for usage example
peterf@0 9 #
peterf@0 10 # Author: Peter Foster
peterf@1 11 # (c) 2015 Peter Foster
peterf@0 12 #
peterf@0 13
peterf@0 14 from scikits.audiolab import Sndfile
peterf@0 15 from scikits.audiolab import play
peterf@0 16 from pandas import DataFrame
peterf@0 17 import argparse
peterf@0 18 import os
peterf@0 19 import sys
peterf@0 20 from threading import Thread
peterf@0 21
peterf@0 22 #Duration of each chunk in seconds
peterf@0 23 chunkDuration = 4
peterf@0 24 #Expected sample rate
peterf@0 25 sampleRate = 48000
peterf@0 26
peterf@0 27 #Annotations containing characters outside this set will not validate
peterf@0 28 PermittedAnnotationCharacters = set('cmfvpbosU')
peterf@0 29 #These characters may only appear by themself
peterf@0 30 LoneAnnotationCharacters = set('sU')
peterf@0 31 LabelMap = "\
peterf@0 32 c: child speech\n\
peterf@0 33 m: adult male speech\n\
peterf@0 34 f: adult female speech\n\
peterf@0 35 v: video Game/TV\n\
peterf@0 36 p: percussive sounds, e.g. crash, bang, knock, footsteps\n\
peterf@0 37 b: broadband noise, e.g. household appliances\n\
peterf@0 38 o: other identifiable sounds\n\
peterf@0 39 s: silence / background noise only\n\
peterf@0 40 U: flag chunk (unidentifiable sounds, not sure how to label)\
peterf@0 41 "
peterf@0 42
peterf@0 43 parser = argparse.ArgumentParser()
peterf@0 44 parser.add_argument('--protocolfile', help="Path to annotation protocol file")
peterf@0 45 parser.add_argument('--annotationfile', help="Path to annotation file")
peterf@0 46 args = vars(parser.parse_args())
peterf@0 47
peterf@0 48 #InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv'
peterf@0 49 InputFile = args['protocolfile']
peterf@0 50 #OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv'
peterf@0 51 OutputFile = args['annotationfile']
peterf@0 52
peterf@0 53 assert(os.path.isfile(InputFile))
peterf@0 54
peterf@0 55 AudioChunks = DataFrame.from_csv(InputFile)
peterf@0 56
peterf@0 57 if not(os.path.isfile(OutputFile)):
peterf@0 58 #Initialise annotation file
peterf@0 59 AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation'])
peterf@0 60 AnnotatedChunks.to_csv(OutputFile)
peterf@0 61
peterf@0 62 AnnotatedChunks = DataFrame.from_csv(OutputFile)
peterf@0 63
peterf@0 64 #Check index integrity
peterf@0 65 assert(all(AnnotatedChunks.index == AudioChunks.index))
peterf@0 66
peterf@0 67 #Audio playback
peterf@0 68 for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]:
peterf@0 69 sf = Sndfile(AudioChunks['audiofile'].ix[i], "r")
peterf@0 70 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
peterf@0 71 sf.seek(AudioChunks['framestart'].ix[i], mode='r')
peterf@0 72 frames = sf.read_frames(chunkDuration * sf.samplerate)
peterf@0 73 sf.close()
peterf@0 74
peterf@0 75 annotationString = ""
peterf@0 76 isValidated = False
peterf@0 77 while not(isValidated):
peterf@0 78 print("Starting playback of chunk " + str(i+1) + " of " + str(len(AudioChunks)))
peterf@0 79
peterf@0 80 #Play chunk in background thread; this allows annotation to begin immediately
peterf@0 81 #We set stdout to devnull, to suppress any output originating from play() function
peterf@0 82 F = open(os.devnull, 'w')
peterf@0 83 old_stdout = sys.stdout
peterf@0 84 sys.stdout = F
peterf@0 85 myFunction = lambda frames, sampleRate: play(frames, sampleRate)
peterf@0 86 myThread = Thread(target=myFunction, args=(frames.T, sampleRate))
peterf@0 87 myThread.start()
peterf@0 88
peterf@0 89 #Unthreaded code here
peterf@0 90 #play(frames.T, sampleRate)
peterf@0 91
peterf@0 92 old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ")
peterf@0 93 annotationString = raw_input()
peterf@0 94 annotationString = annotationString.strip()
peterf@0 95
peterf@0 96 #Wait for playback thread to finish; we don't allow simultaneous playback
peterf@0 97 myThread.join()
peterf@0 98 #Recover old stdout, so we can use print()
peterf@0 99 sys.stdout = old_stdout
peterf@0 100
peterf@0 101
peterf@0 102 #Validate annotation string
peterf@0 103 if PermittedAnnotationCharacters.issuperset(annotationString):
peterf@0 104 if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1:
peterf@0 105 print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.")
peterf@0 106 elif len(annotationString) > 0:
peterf@0 107 isValidated = True
peterf@0 108 else:
peterf@0 109 if annotationString not in {'?', 'help'}:
peterf@0 110 print("Invalid annotation string.")
peterf@0 111 print("Valid characters are: ")
peterf@0 112 print("------------------------------")
peterf@0 113 print(LabelMap)
peterf@0 114 print("------------------------------")
peterf@0 115
peterf@0 116 #Amend and write annotations to csv
peterf@0 117 AnnotatedChunks['annotation'].ix[i] = annotationString
peterf@0 118 AnnotatedChunks.to_csv(OutputFile)
peterf@1 119