chime-home-dataset-annotation-and-baseline-evaluation-code: annotation_scripts/annotationkit_play

annotate annotation_scripts/annotationkit_play_chunks.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.

author	peterf
date	Mon, 01 Feb 2016 21:35:27 +0000
parents	f079d2de4aa2
children

rev	line source
peterf@0	1 #!/usr/bin/python
peterf@0	2
peterf@0	3 #
peterf@0	4 # annotationkit_play_chunks.py:
peterf@1	5 # Play excerpts from a list of audio files as specified by protocol CSV file
peterf@1	6 # and obtain annotations using text interface
peterf@1	7 #
peterf@1	8 # See annotationkit_play_chunks_wrapper.sh for usage example
peterf@0	9 #
peterf@0	10 # Author: Peter Foster
peterf@1	11 # (c) 2015 Peter Foster
peterf@0	12 #
peterf@0	13
peterf@0	14 from scikits.audiolab import Sndfile
peterf@0	15 from scikits.audiolab import play
peterf@0	16 from pandas import DataFrame
peterf@0	17 import argparse
peterf@0	18 import os
peterf@0	19 import sys
peterf@0	20 from threading import Thread
peterf@0	21
peterf@0	22 #Duration of each chunk in seconds
peterf@0	23 chunkDuration = 4
peterf@0	24 #Expected sample rate
peterf@0	25 sampleRate = 48000
peterf@0	26
peterf@0	27 #Annotations containing characters outside this set will not validate
peterf@0	28 PermittedAnnotationCharacters = set('cmfvpbosU')
peterf@0	29 #These characters may only appear by themself
peterf@0	30 LoneAnnotationCharacters = set('sU')
peterf@0	31 LabelMap = "\
peterf@0	32 c: child speech\n\
peterf@0	33 m: adult male speech\n\
peterf@0	34 f: adult female speech\n\
peterf@0	35 v: video Game/TV\n\
peterf@0	36 p: percussive sounds, e.g. crash, bang, knock, footsteps\n\
peterf@0	37 b: broadband noise, e.g. household appliances\n\
peterf@0	38 o: other identifiable sounds\n\
peterf@0	39 s: silence / background noise only\n\
peterf@0	40 U: flag chunk (unidentifiable sounds, not sure how to label)\
peterf@0	41 "
peterf@0	42
peterf@0	43 parser = argparse.ArgumentParser()
peterf@0	44 parser.add_argument('--protocolfile', help="Path to annotation protocol file")
peterf@0	45 parser.add_argument('--annotationfile', help="Path to annotation file")
peterf@0	46 args = vars(parser.parse_args())
peterf@0	47
peterf@0	48 #InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv'
peterf@0	49 InputFile = args['protocolfile']
peterf@0	50 #OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv'
peterf@0	51 OutputFile = args['annotationfile']
peterf@0	52
peterf@0	53 assert(os.path.isfile(InputFile))
peterf@0	54
peterf@0	55 AudioChunks = DataFrame.from_csv(InputFile)
peterf@0	56
peterf@0	57 if not(os.path.isfile(OutputFile)):
peterf@0	58 #Initialise annotation file
peterf@0	59 AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation'])
peterf@0	60 AnnotatedChunks.to_csv(OutputFile)
peterf@0	61
peterf@0	62 AnnotatedChunks = DataFrame.from_csv(OutputFile)
peterf@0	63
peterf@0	64 #Check index integrity
peterf@0	65 assert(all(AnnotatedChunks.index == AudioChunks.index))
peterf@0	66
peterf@0	67 #Audio playback
peterf@0	68 for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]:
peterf@0	69 sf = Sndfile(AudioChunks['audiofile'].ix[i], "r")
peterf@0	70 if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
peterf@0	71 sf.seek(AudioChunks['framestart'].ix[i], mode='r')
peterf@0	72 frames = sf.read_frames(chunkDuration * sf.samplerate)
peterf@0	73 sf.close()
peterf@0	74
peterf@0	75 annotationString = ""
peterf@0	76 isValidated = False
peterf@0	77 while not(isValidated):
peterf@0	78 print("Starting playback of chunk " + str(i+1) + " of " + str(len(AudioChunks)))
peterf@0	79
peterf@0	80 #Play chunk in background thread; this allows annotation to begin immediately
peterf@0	81 #We set stdout to devnull, to suppress any output originating from play() function
peterf@0	82 F = open(os.devnull, 'w')
peterf@0	83 old_stdout = sys.stdout
peterf@0	84 sys.stdout = F
peterf@0	85 myFunction = lambda frames, sampleRate: play(frames, sampleRate)
peterf@0	86 myThread = Thread(target=myFunction, args=(frames.T, sampleRate))
peterf@0	87 myThread.start()
peterf@0	88
peterf@0	89 #Unthreaded code here
peterf@0	90 #play(frames.T, sampleRate)
peterf@0	91
peterf@0	92 old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ")
peterf@0	93 annotationString = raw_input()
peterf@0	94 annotationString = annotationString.strip()
peterf@0	95
peterf@0	96 #Wait for playback thread to finish; we don't allow simultaneous playback
peterf@0	97 myThread.join()
peterf@0	98 #Recover old stdout, so we can use print()
peterf@0	99 sys.stdout = old_stdout
peterf@0	100
peterf@0	101
peterf@0	102 #Validate annotation string
peterf@0	103 if PermittedAnnotationCharacters.issuperset(annotationString):
peterf@0	104 if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1:
peterf@0	105 print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.")
peterf@0	106 elif len(annotationString) > 0:
peterf@0	107 isValidated = True
peterf@0	108 else:
peterf@0	109 if annotationString not in {'?', 'help'}:
peterf@0	110 print("Invalid annotation string.")
peterf@0	111 print("Valid characters are: ")
peterf@0	112 print("------------------------------")
peterf@0	113 print(LabelMap)
peterf@0	114 print("------------------------------")
peterf@0	115
peterf@0	116 #Amend and write annotations to csv
peterf@0	117 AnnotatedChunks['annotation'].ix[i] = annotationString
peterf@0	118 AnnotatedChunks.to_csv(OutputFile)
peterf@1	119

Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code

annotate annotation_scripts/annotationkit_play_chunks.py @ 5:b523456082ca tip