view annotation_scripts/annotationkit_play_chunks.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.
author peterf
date Mon, 01 Feb 2016 21:35:27 +0000
parents f079d2de4aa2
children
line wrap: on
line source
#!/usr/bin/python

#
# annotationkit_play_chunks.py:
#    Play excerpts from a list of audio files as specified by protocol CSV file
#    and obtain annotations using text interface
#
#    See annotationkit_play_chunks_wrapper.sh for usage example
#
# Author: Peter Foster
# (c) 2015 Peter Foster
#

from scikits.audiolab import Sndfile
from scikits.audiolab import play
from pandas import DataFrame
import argparse
import os
import sys
from threading import Thread

#Duration of each chunk in seconds
chunkDuration = 4
#Expected sample rate
sampleRate = 48000

#Annotations containing characters outside this set will not validate
PermittedAnnotationCharacters = set('cmfvpbosU')
#These characters may only appear by themself
LoneAnnotationCharacters = set('sU')
LabelMap = "\
c:   child speech\n\
m:   adult male speech\n\
f:   adult female speech\n\
v:   video Game/TV\n\
p:   percussive sounds, e.g. crash, bang, knock, footsteps\n\
b:   broadband noise, e.g. household appliances\n\
o:   other identifiable sounds\n\
s:   silence / background noise only\n\
U:   flag chunk (unidentifiable sounds, not sure how to label)\
"

parser = argparse.ArgumentParser()
parser.add_argument('--protocolfile', help="Path to annotation protocol file")
parser.add_argument('--annotationfile', help="Path to annotation file")
args = vars(parser.parse_args())

#InputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotation_protocol.csv'
InputFile = args['protocolfile']
#OutputFile = '/import/c4dm-scratch/peterf/audex/results/exploratory/annotations.csv'
OutputFile = args['annotationfile']

assert(os.path.isfile(InputFile))

AudioChunks = DataFrame.from_csv(InputFile)

if not(os.path.isfile(OutputFile)):
    #Initialise annotation file
    AnnotatedChunks = DataFrame(index=AudioChunks.index, columns=['annotation'])
    AnnotatedChunks.to_csv(OutputFile)
    
AnnotatedChunks = DataFrame.from_csv(OutputFile)

#Check index integrity
assert(all(AnnotatedChunks.index  == AudioChunks.index))

#Audio playback
for i in AnnotatedChunks.index[AnnotatedChunks['annotation'].isnull()]:
    sf = Sndfile(AudioChunks['audiofile'].ix[i], "r")
    if sf.samplerate != sampleRate: raise ValueError("wanted sample rate %g - got %g." % (sampleRate, sf.samplerate))
    sf.seek(AudioChunks['framestart'].ix[i], mode='r')    
    frames = sf.read_frames(chunkDuration * sf.samplerate)
    sf.close()

    annotationString = ""
    isValidated = False
    while not(isValidated):
        print("Starting playback of chunk  " + str(i+1) + " of " + str(len(AudioChunks)))
        
        #Play chunk in background thread; this allows annotation to begin immediately
        #We set stdout to devnull, to suppress any output originating from play() function
        F = open(os.devnull, 'w')
        old_stdout = sys.stdout
        sys.stdout = F
        myFunction = lambda frames, sampleRate: play(frames, sampleRate)
        myThread = Thread(target=myFunction, args=(frames.T, sampleRate))
        myThread.start()
        
        #Unthreaded code here
        #play(frames.T, sampleRate)
        
        old_stdout.write("Enter annotation string, or simply hit return to replay chunk. > ")
        annotationString = raw_input()
        annotationString = annotationString.strip()
        
        #Wait for playback thread to finish; we don't allow simultaneous playback
        myThread.join()
        #Recover old stdout, so we can use print()
        sys.stdout = old_stdout
        
        
        #Validate annotation string
        if PermittedAnnotationCharacters.issuperset(annotationString):
            if any([c in LoneAnnotationCharacters for c in annotationString]) and len(annotationString) > 1:
                print("Invalid annotation string. Characters in " + str(LoneAnnotationCharacters) + " may only appear in isolation.")
            elif len(annotationString) > 0:
                isValidated = True
        else:
            if annotationString not in {'?', 'help'}:
                print("Invalid annotation string.")
            print("Valid characters are: ")
            print("------------------------------")
            print(LabelMap)
            print("------------------------------")
               
    #Amend and write annotations to csv           
    AnnotatedChunks['annotation'].ix[i] = annotationString
    AnnotatedChunks.to_csv(OutputFile)