Mercurial > hg > autoencoder-specgram
annotate userconfig.py @ 1:04f1e3463466 tip master
Implement maxpooling and unpooling aspect
| author | Dan Stowell <danstowell@users.sourceforge.net> |
|---|---|
| date | Wed, 13 Jan 2016 09:56:16 +0000 |
| parents | 73317239d6d1 |
| children |
| rev | line source |
|---|---|
| danstowell@0 | 1 |
| danstowell@0 | 2 # Configuration options that you might like to change |
| danstowell@0 | 3 |
| danstowell@0 | 4 example_is_audio = True # if False, generates simple sparse data for probing; else loads an audio file |
| danstowell@0 | 5 examplegram_startindex = 550 # just choosing which bit to plot |
| danstowell@0 | 6 |
| danstowell@0 | 7 #examplewavpath = "~/birdsong/linhart2015mar/concatall/perfolder/PC1101-rep-day2.wav" |
| danstowell@0 | 8 examplewavpath = "509.WAV" |
| danstowell@0 | 9 examplewavpath = "renneschiffchaff20130320bout1filt.wav" |
| danstowell@0 | 10 |
| danstowell@0 | 11 srate = 22050. |
| danstowell@0 | 12 wavdownsample = 2 # eg 44 kHz audio, factor of 2, gets loaded as 22 kHz. for no downsampling, set this ratio to 1 |
| danstowell@0 | 13 |
| danstowell@0 | 14 audioframe_len = 128 |
| danstowell@0 | 15 audioframe_stride = 64 |
| danstowell@0 | 16 |
| danstowell@0 | 17 specbinlow = 10 |
| danstowell@0 | 18 specbinnum = 32 |
| danstowell@0 | 19 |
| danstowell@0 | 20 featframe_len = 9 |
| danstowell@0 | 21 featframe_stride = 16 |
| danstowell@0 | 22 numfilters = 6 |
| danstowell@0 | 23 minibatchsize = 16 |
| danstowell@0 | 24 numtimebins = 160 # 128 # 48 # NOTE that this size needs really to be compatible with downsampling (maxpooling) steps if you use them. |
| danstowell@0 | 25 |
| danstowell@0 | 26 |
| danstowell@0 | 27 ########################################################### |
| danstowell@0 | 28 # Below, we calculate some other things based on the config |
| danstowell@0 | 29 |
| danstowell@0 | 30 import os |
| danstowell@0 | 31 examplewavpath = os.path.expanduser(examplewavpath) |
| danstowell@0 | 32 |
| danstowell@0 | 33 |
| danstowell@0 | 34 hopsize_secs = audioframe_stride / float(srate) |
| danstowell@0 | 35 print("Specgram frame hop size: %.3g ms" % (hopsize_secs * 1000)) |
| danstowell@0 | 36 specgramlen_secs = hopsize_secs * numtimebins |
| danstowell@0 | 37 print("Specgram duration: %.3g s" % specgramlen_secs) |
| danstowell@0 | 38 |
