annotate bindings/python/pyadb.py @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents b9dbe4611dde
children
rev   line source
map01bf@625 1 #!/usr/bin/env python
map01bf@625 2 # encoding: utf-8
map01bf@625 3 """
map01bf@625 4 pyadb.py
map01bf@625 5
map01bf@625 6 public access and class structure for python audioDb api bindings.
map01bf@625 7
map01bf@628 8
map01bf@628 9
map01bf@625 10 Created by Benjamin Fields on 2009-09-22.
map01bf@625 11 Copyright (c) 2009 Goldsmith University of London.
map01bf@625 12 """
map01bf@625 13
map01bf@625 14 import sys
map01bf@625 15 import os, os.path
map01bf@625 16 import unittest
map01bf@625 17 import _pyadb
map01bf@625 18
map01bf@717 19 ADB_HEADER_FLAG_L2NORM = 0x1 #annoyingly I can't find a means
map01bf@717 20 ADB_HEADER_FLAG_POWER = 0x4 #around defining these flag definitions
map01bf@717 21 ADB_HEADER_FLAG_TIMES = 0x20 #as they aren't even exported to the
map01bf@717 22 ADB_HEADER_FLAG_REFERENCES = 0x40 #api, so this is the only way to get them.
map01bf@625 23
map01bf@625 24 class Usage(Exception):
map01bf@628 25 """error to indicate that a method has been called with incorrect args"""
map01bf@628 26 def __init__(self, msg):
map01bf@628 27 self.msg = msg
map01bf@625 28
map01bf@628 29 class Pyadb(object):
map01bf@628 30 """Pyadb class. Allows for creation, access, insertion and query of an audioDB vector matching database."""
map01bf@628 31 validConfigTerms = {"seqLength":int, "seqStart":int, "exhaustive":bool,
map01bf@628 32 "falsePositives":bool, "accumulation":str, "distance":str, "npoints":int,
map01bf@628 33 "ntracks":int, "includeKeys":list, "excludeKeys":list, "radius":float, "absThres":float,
map01bf@628 34 "relThres":float, "durRatio":float, "hopSize":int, "resFmt":str}
map01bf@721 35 def __init__(self, path, mode='w', datasize=0, ntracks=0, datadim=0):
map01bf@721 36 """
map01bf@721 37 initialize the database. By default db will hold 20000 tracks, be 2GB in size and determine datadim from the first inserted feature
map01bf@721 38 """
map01bf@625 39 self.path = path
map01bf@628 40 self.configQuery = {}
map01bf@625 41 if not (mode=='w' or mode =='r'):
map01bf@625 42 raise(ValueError, "if specified, mode must be either\'r\' or \'w\'.")
map01bf@625 43 if os.path.exists(path):
map01bf@625 44 self._db = _pyadb._pyadb_open(path, mode)
map01bf@625 45 else:
map01bf@721 46 self._db = _pyadb._pyadb_create(path,datasize,ntracks,datadim)
map01bf@625 47 self._updateDBAttributes()
map01bf@625 48 return
map01bf@625 49
map01bf@625 50 def insert(self, featFile=None, powerFile=None, timesFile=None, featData=None, powerData=None, timesData=None, key=None):
map01bf@717 51 """
map01bf@717 52 Insert features into database. Can be done with data provided directly or by giving a path to a binary fftExtract style feature file. If power and/or timing is engaged in the database header, it must be provided (via the same means as the feature) or a Usage exception will be raised. Power files should be of the same binary type as features. Times files should be the ascii number length of time in seconds from the begining of the file to segment start, one per line.
map01bf@717 53 If providing data directly, featData should be a numpy array with shape= (number of Dimensions, number of Vectors)
map01bf@717 54 """
map01bf@628 55 #While python style normally advocates leaping before looking, these check are nessecary as
map01bf@625 56 #it is very difficult to assertain why the insertion failed once it has been called.
mas01mc@743 57 if (self.hasPower and (((featFile) and powerFile==None) or ((not featData==None) and powerData==None))):
map01bf@625 58 raise(Usage, "The db you are attempting an insert on (%s) expects power and you either\
map01bf@625 59 haven't provided any or have done so in the wrong format."%self.path)
mas01mc@743 60 if (self.hasTimes and (((timesFile) and timesFile==None) or ((not timesData==None) and timesData==None))):
map01bf@625 61 raise(Usage, "The db you are attempting an insert on (%s) expects times and you either\
map01bf@625 62 haven't provided any or have done so in the wrong format."%self.path)
map01bf@625 63 args = {"db":self._db}
map01bf@625 64 if featFile:
map01bf@625 65 args["features"] = featFile
map01bf@717 66 elif (featData != None):
map01bf@625 67 args["features"] = featData
map01bf@625 68 else:
map01bf@625 69 raise(Usage, "Must provide some feature data!")
map01bf@625 70 if self.hasPower:
map01bf@625 71 if featFile:
map01bf@625 72 args["power"]=powerFile
mas01mc@743 73 elif featData.any():
mas01mc@743 74 args["power"]=powerData
mas01mc@749 75 if timesData != None:
mas01mc@749 76 self.hasTimes=True
map01bf@625 77 if self.hasTimes:
map01bf@625 78 if featFile:
map01bf@625 79 args["times"]=timesFile
mas01mc@743 80 elif timesData.any():
mas01mc@749 81 args["times"]=timesData
map01bf@625 82 if key:
map01bf@625 83 args["key"]=str(key)
map01bf@625 84 if featFile:
map01bf@625 85 if not _pyadb._pyadb_insertFromFile(**args):
map01bf@719 86 raise RuntimeError("Insertion from file failed for an unknown reason.")
map01bf@625 87 else:
map01bf@625 88 self._updateDBAttributes()
map01bf@625 89 return
map01bf@717 90 elif (featData != None):
mas01mc@743 91 if (len(args["features"].shape) == 1) :
mas01mc@743 92 args["features"] = args["features"].reshape((args["features"].shape[0],1))
map01bf@720 93 args["nVect"], args["nDim"] = args["features"].shape
map01bf@717 94 args["features"] = args["features"].flatten()
mas01mc@744 95 if(self.hasPower and powerData != None):
mas01mc@743 96 if (len(args["power"].shape) == 1) :
mas01mc@743 97 args["power"] = args["power"].reshape((args["power"].shape[0],1))
mas01mc@743 98 args["power"] = args["power"].flatten()
mas01mc@749 99 if(self.hasTimes and timesData != None):
mas01mc@749 100 if (len(args["times"].shape) == 1) :
mas01mc@749 101 args["times"] = args["times"].reshape((args["times"].shape[0],1))
mas01mc@749 102 args["times"] = args["times"].flatten()
mas01mc@749 103
map01bf@717 104 print "args: " + str(args)
map01bf@719 105 ok = _pyadb._pyadb_insertFromArray(**args)
map01bf@719 106 if not (ok==0):
map01bf@719 107 raise RuntimeError("Direct data insertion failed for an unknown reason. err code = %i"%ok)
map01bf@717 108 else:
map01bf@717 109 self._updateDBAttributes()
map01bf@717 110 return
map01bf@628 111
map01bf@628 112 def configCheck(self, scrub=False):
map01bf@628 113 """examine self.configQuery dict. For each key encouters confirm it is in the validConfigTerms list and if appropriate, type check. If scrub is False, leave unexpected keys and values alone and return False, if scrub try to correct errors (attempt type casts and remove unexpected entries) and continue. If self.configQuery only contains expected keys with correctly typed values, return True. See Pyadb.validConfigTerms for allowed keys and types. Note also that include/exclude key lists memebers or string switched are not verified here, but rather when they are converted to const char * in the C api call and if malformed, an error will be rasied from there. Valid keys and values in queryconfig:
map01bf@628 114 {seqLength : Int Sequence Length, \n\
map01bf@628 115 seqStart : Int offset from start for key, \n\
map01bf@628 116 exhaustive : boolean - True for exhaustive (false by default),\n\
map01bf@628 117 falsePositives: boolean - True to keep fps (false by defaults),\n\
map01bf@628 118 accumulation : [\"db\"|\"track\"|\"one2one\"] (\"db\" by default),\n\
mas01mc@768 119 distance : [\"dot\"|\"eucNorm\"|\"euclidean\"|\"kullback\"] (\"dot\" by default),\n\
map01bf@628 120 npoints : int number of points per track,\n\
map01bf@628 121 ntracks : max number of results returned in db accu mode,\n\
map01bf@628 122 includeKeys : list of strings to include (use all by default),\n\
map01bf@628 123 excludeKeys : list of strings to exclude (none by default),\n\
map01bf@628 124 radius : double of nnRadius (1.0 default, overrides npoints if specified),\n\
map01bf@628 125 absThres : double absolute power threshold (db must have power),\n\
map01bf@628 126 relThres : double relative power threshold (db must have power),\n\
map01bf@628 127 durRatio : double time expansion/compresion ratio,\n\
map01bf@628 128 hopSize : int hopsize (1 by default)])->resultDict\n\
map01bf@628 129 resFmt : [\"list\"|\"dict\"](\"dict\" by default)}"""
mas01mj@630 130 for key in self.configQuery.keys():
map01bf@628 131 if key not in Pyadb.validConfigTerms.keys():
mas01mj@630 132 if not scrub: return False
map01bf@632 133 print "scrubbing %s from query config."%str(key)
mas01mj@630 134 del self.configQuery[key]
mas01mj@630 135 if not isinstance(self.configQuery[key], Pyadb.validConfigTerms[key]):
mas01mj@630 136 if not scrub: return False
mas01mj@630 137 self.configQuery[key] = Pyadb.validConfigTerms[key](self.configQuery[key])#hrm, syntax?
mas01mj@630 138 return True
map01bf@628 139
map01bf@628 140 #
map01bf@628 141
map01bf@632 142 def query(self, key=None, featData=None, strictConfig=True):
map01bf@628 143 """query the database. Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
map01bf@628 144 if not self.configCheck():
map01bf@628 145 if strictConfig:
map01bf@628 146 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
map01bf@628 147 Only keys found in Pyadb.validConfigTerms may be defined")
map01bf@628 148 else:
map01bf@632 149 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
map01bf@632 150 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
map01bf@628 151 self.configCheck(scrub=True)
map01bf@628 152 if ((not key and not featData) or (key and featData)):
map01bf@628 153 raise Usage("query require either key or featData to be defined, you have defined both or neither.")
map01bf@628 154 if key:
map01bf@628 155 result = _pyadb._pyadb_queryFromKey(self._db, key, **self.configQuery)
map01bf@628 156 elif featData:
map01bf@628 157 raise NotImplementedError("direct data query not yet implemented. Sorry.")
mas01mj@630 158 return Pyadb.Result(result, self.configQuery)
mas01mc@750 159
mas01mc@750 160 def query_data(self, featData=None, powerData=None, timesData=None, strictConfig=True):
mas01mc@750 161 """query the database using numpy arrays. required data: featData, optional data: [powerData, timesData]Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
mas01mc@750 162 if not self.configCheck():
mas01mc@750 163 if strictConfig:
mas01mc@750 164 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
mas01mc@750 165 Only keys found in Pyadb.validConfigTerms may be defined")
mas01mc@750 166 else:
mas01mc@750 167 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
mas01mc@750 168 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
mas01mc@750 169 self.configCheck(scrub=True)
mas01mc@750 170 cq = self.configQuery.copy()
mas01mc@750 171 if (featData==None):
mas01mc@750 172 raise Usage("query requires featData to be defined.")
mas01mc@750 173 if(powerData!=None):
mas01mc@750 174 cq['power']=powerData
mas01mc@750 175 if(timesData!=None):
mas01mc@750 176 cq['times']=timesData
mas01mc@750 177 result = _pyadb._pyadb_queryFromData(self._db, featData, **cq)
mas01mc@750 178 return Pyadb.Result(result, self.configQuery)
map01bf@625 179
map01bf@638 180 def status(self):
map01bf@638 181 '''update attributes and return them as a dict'''
map01bf@638 182 self._updateDBAttributes()
map01bf@638 183 return { "numFiles" : self.numFiles,
map01bf@638 184 "dims" : self.dims,
map01bf@638 185 "dudCount" : self.dudCount,
map01bf@638 186 "nullCount": self.nullCount,
map01bf@638 187 "length" : self.length,
map01bf@638 188 "data_region_size" : self.data_region_size,
map01bf@638 189 "l2Normed" : self.l2Normed,
map01bf@638 190 "hasPower" : self.hasPower,
map01bf@638 191 "hasTimes" : self.hasTimes,
map01bf@638 192 "usesRefs" : self.usesRefs}
map01bf@625 193 ###internal methods###
map01bf@625 194 def _updateDBAttributes(self):
map01bf@625 195 '''run _pyadb_status to fill/update the database level flags and info'''
map01bf@625 196 rawFlags = long(0)
map01bf@625 197 (self.numFiles,
map01bf@625 198 self.dims,
map01bf@625 199 self.dudCount,
map01bf@625 200 self.nullCount,
map01bf@625 201 rawFlags,
map01bf@625 202 self.length,
map01bf@625 203 self.data_region_size) = _pyadb._pyadb_status(self._db)
map01bf@625 204 self.l2Normed = bool(rawFlags & ADB_HEADER_FLAG_L2NORM)
map01bf@625 205 self.hasPower = bool(rawFlags & ADB_HEADER_FLAG_POWER)
map01bf@625 206 self.hasTimes = bool(rawFlags & ADB_HEADER_FLAG_TIMES)
map01bf@625 207 self.usesRefs = bool(rawFlags & ADB_HEADER_FLAG_REFERENCES)
map01bf@625 208 return
map01bf@625 209
map01bf@628 210 class Result(object):
map01bf@628 211 def __init__(self, rawData, currentConfig):
map01bf@628 212 self.rawData = rawData
map01bf@628 213 if "resFmt" in currentConfig:
map01bf@628 214 self.type = currentConfig["resFmt"]
map01bf@628 215 else:
map01bf@628 216 self.type = "dict"
map01bf@628 217 def __str__(self):
mas01mj@631 218 return str(self.rawData)
map01bf@628 219 def __repr__(self):
mas01mj@631 220 return repr(self.rawData)
map01bf@625 221
mas01mc@748 222 def liszt(self):
mas01mc@748 223 '''run _pyadb_liszt to get a list of database keys'''
mas01mc@748 224 if self._db != None:
mas01mc@748 225 return _pyadb._pyadb_liszt(self._db)
mas01mc@748 226 else:
mas01mc@748 227 print "Error in liszt(): ADB database not defined"
mas01mc@748 228 return 0
mas01mc@748 229
mas01mc@748 230 def retrieve_datum(self, key, **args):
mas01mc@748 231 '''run _pyadb_retrieveDatum to retrieve data by key:
mas01mc@748 232 features=True, to get features
mas01mc@748 233 powers=True, to get Powers
mas01mc@748 234 times=True, to get Times
mas01mc@748 235 '''
mas01mc@748 236 if self._db != None:
mas01mc@748 237 return _pyadb._pyadb_retrieveDatum(self._db, key=key, **args)
mas01mc@748 238 else:
mas01mc@748 239 print "Error in liszt(): ADB database not defined"
mas01mc@748 240 return 0
mas01mc@748 241
map01bf@625 242 class untitledTests(unittest.TestCase):
map01bf@625 243 def setUp(self):
map01bf@625 244 pass
map01bf@625 245
map01bf@625 246
map01bf@625 247 if __name__ == '__main__':
mas01mj@630 248 unittest.main()