annotate util.py @ 1:04f1e3463466 tip master

Implement maxpooling and unpooling aspect
author Dan Stowell <danstowell@users.sourceforge.net>
date Wed, 13 Jan 2016 09:56:16 +0000
parents 73317239d6d1
children
rev   line source
danstowell@0 1
danstowell@0 2 # utility functions
danstowell@0 3
danstowell@0 4 import numpy as np
danstowell@0 5 from numpy import float32
danstowell@0 6
danstowell@0 7 import os, errno
danstowell@0 8 from scikits.audiolab import Sndfile
danstowell@0 9 from scikits.audiolab import Format
danstowell@0 10
danstowell@0 11 from matplotlib.mlab import specgram
danstowell@0 12
danstowell@0 13 from userconfig import *
danstowell@0 14
danstowell@0 15 ########################################################
danstowell@0 16
danstowell@0 17 def standard_specgram(signal):
danstowell@0 18 "Return specgram matrix, made using the audio-layer config"
danstowell@0 19 return np.array(specgram(signal, NFFT=audioframe_len, noverlap=audioframe_len-audioframe_stride, window=np.hamming(audioframe_len))[0][specbinlow:specbinlow+specbinnum,:], dtype=float32)
danstowell@0 20
danstowell@0 21 def load_soundfile(inwavpath, startpossecs, maxdursecs=None):
danstowell@0 22 """Loads audio data, optionally limiting to a specified start position and duration.
danstowell@0 23 Must be SINGLE-CHANNEL and matching our desired sample-rate."""
danstowell@0 24 framelen = 4096
danstowell@0 25 hopspls = framelen
danstowell@0 26 unhopspls = framelen - hopspls
danstowell@0 27 if (framelen % wavdownsample) != 0: raise ValueError("framelen needs to be a multiple of wavdownsample: %i, %i" % (framelen, wavdownsample))
danstowell@0 28 if (hopspls % wavdownsample) != 0: raise ValueError("hopspls needs to be a multiple of wavdownsample: %i, %i" % (hopspls , wavdownsample))
danstowell@0 29 if maxdursecs==None:
danstowell@0 30 maxdursecs = 9999
danstowell@0 31 sf = Sndfile(inwavpath, "r")
danstowell@0 32 splsread = 0
danstowell@0 33 framesread = 0
danstowell@0 34 if sf.channels != 1: raise ValueError("Sound file %s has multiple channels (%i) - mono required." % (inwavpath, sf.channels))
danstowell@0 35 timemax_spls = int(maxdursecs * sf.samplerate)
danstowell@0 36 if sf.samplerate != (srate * wavdownsample):
danstowell@0 37 raise ValueError("Sample rate mismatch: we expect %g, file has %g" % (srate, sf.samplerate))
danstowell@0 38 if startpossecs > 0:
danstowell@0 39 sf.seek(startpossecs * sf.samplerate) # note: returns IOError if beyond the end
danstowell@0 40 audiodata = np.array([], dtype=np.float32)
danstowell@0 41 while(True):
danstowell@0 42 try:
danstowell@0 43 if splsread==0:
danstowell@0 44 chunk = sf.read_frames(framelen)[::wavdownsample]
danstowell@0 45 splsread += framelen
danstowell@0 46 else:
danstowell@0 47 chunk = np.hstack((chunk[:unhopspls], sf.read_frames(hopspls)[::wavdownsample] ))
danstowell@0 48 splsread += hopspls
danstowell@0 49 framesread += 1
danstowell@0 50 if framesread % 25000 == 0:
danstowell@0 51 print("Read %i frames" % framesread)
danstowell@0 52 if len(chunk) != (framelen / wavdownsample):
danstowell@0 53 print("Not read sufficient samples - returning")
danstowell@0 54 break
danstowell@0 55 chunk = np.array(chunk, dtype=np.float32)
danstowell@0 56 audiodata = np.hstack((audiodata, chunk))
danstowell@0 57 if splsread >= timemax_spls:
danstowell@0 58 break
danstowell@0 59 except RuntimeError:
danstowell@0 60 break
danstowell@0 61 sf.close()
danstowell@0 62 return audiodata
danstowell@0 63
danstowell@0 64 def mkdir_p(path):
danstowell@0 65 try:
danstowell@0 66 os.makedirs(path)
danstowell@0 67 except OSError as exc: # Python >2.5
danstowell@0 68 if exc.errno == errno.EEXIST and os.path.isdir(path):
danstowell@0 69 pass
danstowell@0 70 else: raise
danstowell@0 71