autoencoder-specgram: util.py annotate

annotate util.py @ 1:04f1e3463466 tip master

Implement maxpooling and unpooling aspect

author	Dan Stowell <danstowell@users.sourceforge.net>
date	Wed, 13 Jan 2016 09:56:16 +0000
parents	73317239d6d1
children

rev	line source
danstowell@0	1
danstowell@0	2 # utility functions
danstowell@0	3
danstowell@0	4 import numpy as np
danstowell@0	5 from numpy import float32
danstowell@0	6
danstowell@0	7 import os, errno
danstowell@0	8 from scikits.audiolab import Sndfile
danstowell@0	9 from scikits.audiolab import Format
danstowell@0	10
danstowell@0	11 from matplotlib.mlab import specgram
danstowell@0	12
danstowell@0	13 from userconfig import *
danstowell@0	14
danstowell@0	15 ########################################################
danstowell@0	16
danstowell@0	17 def standard_specgram(signal):
danstowell@0	18 "Return specgram matrix, made using the audio-layer config"
danstowell@0	19 return np.array(specgram(signal, NFFT=audioframe_len, noverlap=audioframe_len-audioframe_stride, window=np.hamming(audioframe_len))[0][specbinlow:specbinlow+specbinnum,:], dtype=float32)
danstowell@0	20
danstowell@0	21 def load_soundfile(inwavpath, startpossecs, maxdursecs=None):
danstowell@0	22 """Loads audio data, optionally limiting to a specified start position and duration.
danstowell@0	23 Must be SINGLE-CHANNEL and matching our desired sample-rate."""
danstowell@0	24 framelen = 4096
danstowell@0	25 hopspls = framelen
danstowell@0	26 unhopspls = framelen - hopspls
danstowell@0	27 if (framelen % wavdownsample) != 0: raise ValueError("framelen needs to be a multiple of wavdownsample: %i, %i" % (framelen, wavdownsample))
danstowell@0	28 if (hopspls % wavdownsample) != 0: raise ValueError("hopspls needs to be a multiple of wavdownsample: %i, %i" % (hopspls , wavdownsample))
danstowell@0	29 if maxdursecs==None:
danstowell@0	30 maxdursecs = 9999
danstowell@0	31 sf = Sndfile(inwavpath, "r")
danstowell@0	32 splsread = 0
danstowell@0	33 framesread = 0
danstowell@0	34 if sf.channels != 1: raise ValueError("Sound file %s has multiple channels (%i) - mono required." % (inwavpath, sf.channels))
danstowell@0	35 timemax_spls = int(maxdursecs * sf.samplerate)
danstowell@0	36 if sf.samplerate != (srate * wavdownsample):
danstowell@0	37 raise ValueError("Sample rate mismatch: we expect %g, file has %g" % (srate, sf.samplerate))
danstowell@0	38 if startpossecs > 0:
danstowell@0	39 sf.seek(startpossecs * sf.samplerate) # note: returns IOError if beyond the end
danstowell@0	40 audiodata = np.array([], dtype=np.float32)
danstowell@0	41 while(True):
danstowell@0	42 try:
danstowell@0	43 if splsread==0:
danstowell@0	44 chunk = sf.read_frames(framelen)[::wavdownsample]
danstowell@0	45 splsread += framelen
danstowell@0	46 else:
danstowell@0	47 chunk = np.hstack((chunk[:unhopspls], sf.read_frames(hopspls)[::wavdownsample] ))
danstowell@0	48 splsread += hopspls
danstowell@0	49 framesread += 1
danstowell@0	50 if framesread % 25000 == 0:
danstowell@0	51 print("Read %i frames" % framesread)
danstowell@0	52 if len(chunk) != (framelen / wavdownsample):
danstowell@0	53 print("Not read sufficient samples - returning")
danstowell@0	54 break
danstowell@0	55 chunk = np.array(chunk, dtype=np.float32)
danstowell@0	56 audiodata = np.hstack((audiodata, chunk))
danstowell@0	57 if splsread >= timemax_spls:
danstowell@0	58 break
danstowell@0	59 except RuntimeError:
danstowell@0	60 break
danstowell@0	61 sf.close()
danstowell@0	62 return audiodata
danstowell@0	63
danstowell@0	64 def mkdir_p(path):
danstowell@0	65 try:
danstowell@0	66 os.makedirs(path)
danstowell@0	67 except OSError as exc: # Python >2.5
danstowell@0	68 if exc.errno == errno.EEXIST and os.path.isdir(path):
danstowell@0	69 pass
danstowell@0	70 else: raise
danstowell@0	71

Mercurial > hg > autoencoder-specgram

annotate util.py @ 1:04f1e3463466 tip master