annotate bindings/python/pyadb.py @ 744:124ae047b968

Added python bindings for liszt (LIst SiZes and Timepoints of each database entry)
author mas01mc
date Thu, 11 Nov 2010 05:34:47 +0000
parents 63a75a2b5fa6
children e5f96902afaf 9b75573be3b9
rev   line source
map01bf@625 1 #!/usr/bin/env python
map01bf@625 2 # encoding: utf-8
map01bf@625 3 """
map01bf@625 4 pyadb.py
map01bf@625 5
map01bf@625 6 public access and class structure for python audioDb api bindings.
map01bf@625 7
map01bf@628 8
map01bf@628 9
map01bf@625 10 Created by Benjamin Fields on 2009-09-22.
map01bf@625 11 Copyright (c) 2009 Goldsmith University of London.
map01bf@625 12 """
map01bf@625 13
map01bf@625 14 import sys
map01bf@625 15 import os, os.path
map01bf@625 16 import unittest
map01bf@625 17 import _pyadb
map01bf@625 18
map01bf@717 19 ADB_HEADER_FLAG_L2NORM = 0x1 #annoyingly I can't find a means
map01bf@717 20 ADB_HEADER_FLAG_POWER = 0x4 #around defining these flag definitions
map01bf@717 21 ADB_HEADER_FLAG_TIMES = 0x20 #as they aren't even exported to the
map01bf@717 22 ADB_HEADER_FLAG_REFERENCES = 0x40 #api, so this is the only way to get them.
map01bf@625 23
map01bf@625 24 class Usage(Exception):
map01bf@628 25 """error to indicate that a method has been called with incorrect args"""
map01bf@628 26 def __init__(self, msg):
map01bf@628 27 self.msg = msg
map01bf@625 28
map01bf@628 29 class Pyadb(object):
map01bf@628 30 """Pyadb class. Allows for creation, access, insertion and query of an audioDB vector matching database."""
map01bf@628 31 validConfigTerms = {"seqLength":int, "seqStart":int, "exhaustive":bool,
map01bf@628 32 "falsePositives":bool, "accumulation":str, "distance":str, "npoints":int,
map01bf@628 33 "ntracks":int, "includeKeys":list, "excludeKeys":list, "radius":float, "absThres":float,
map01bf@628 34 "relThres":float, "durRatio":float, "hopSize":int, "resFmt":str}
map01bf@721 35 def __init__(self, path, mode='w', datasize=0, ntracks=0, datadim=0):
map01bf@721 36 """
map01bf@721 37 initialize the database. By default db will hold 20000 tracks, be 2GB in size and determine datadim from the first inserted feature
map01bf@721 38 """
map01bf@625 39 self.path = path
map01bf@628 40 self.configQuery = {}
map01bf@625 41 if not (mode=='w' or mode =='r'):
map01bf@625 42 raise(ValueError, "if specified, mode must be either\'r\' or \'w\'.")
map01bf@625 43 if os.path.exists(path):
map01bf@625 44 self._db = _pyadb._pyadb_open(path, mode)
map01bf@625 45 else:
map01bf@721 46 self._db = _pyadb._pyadb_create(path,datasize,ntracks,datadim)
map01bf@625 47 self._updateDBAttributes()
map01bf@625 48 return
map01bf@625 49
map01bf@625 50 def insert(self, featFile=None, powerFile=None, timesFile=None, featData=None, powerData=None, timesData=None, key=None):
map01bf@717 51 """
map01bf@717 52 Insert features into database. Can be done with data provided directly or by giving a path to a binary fftExtract style feature file. If power and/or timing is engaged in the database header, it must be provided (via the same means as the feature) or a Usage exception will be raised. Power files should be of the same binary type as features. Times files should be the ascii number length of time in seconds from the begining of the file to segment start, one per line.
map01bf@717 53 If providing data directly, featData should be a numpy array with shape= (number of Dimensions, number of Vectors)
map01bf@717 54 """
map01bf@628 55 #While python style normally advocates leaping before looking, these check are nessecary as
map01bf@625 56 #it is very difficult to assertain why the insertion failed once it has been called.
mas01mc@743 57 if (self.hasPower and (((featFile) and powerFile==None) or ((not featData==None) and powerData==None))):
map01bf@625 58 raise(Usage, "The db you are attempting an insert on (%s) expects power and you either\
map01bf@625 59 haven't provided any or have done so in the wrong format."%self.path)
mas01mc@743 60 if (self.hasTimes and (((timesFile) and timesFile==None) or ((not timesData==None) and timesData==None))):
map01bf@625 61 raise(Usage, "The db you are attempting an insert on (%s) expects times and you either\
map01bf@625 62 haven't provided any or have done so in the wrong format."%self.path)
map01bf@625 63 args = {"db":self._db}
map01bf@625 64 if featFile:
map01bf@625 65 args["features"] = featFile
map01bf@717 66 elif (featData != None):
map01bf@625 67 args["features"] = featData
map01bf@625 68 else:
map01bf@625 69 raise(Usage, "Must provide some feature data!")
map01bf@625 70 if self.hasPower:
map01bf@625 71 if featFile:
map01bf@625 72 args["power"]=powerFile
mas01mc@743 73 elif featData.any():
mas01mc@743 74 args["power"]=powerData
map01bf@625 75 if self.hasTimes:
map01bf@625 76 if featFile:
map01bf@625 77 args["times"]=timesFile
mas01mc@743 78 elif timesData.any():
map01bf@625 79 pass
map01bf@625 80 if key:
map01bf@625 81 args["key"]=str(key)
map01bf@625 82 if featFile:
map01bf@625 83 if not _pyadb._pyadb_insertFromFile(**args):
map01bf@719 84 raise RuntimeError("Insertion from file failed for an unknown reason.")
map01bf@625 85 else:
map01bf@625 86 self._updateDBAttributes()
map01bf@625 87 return
map01bf@717 88 elif (featData != None):
mas01mc@743 89 if (len(args["features"].shape) == 1) :
mas01mc@743 90 args["features"] = args["features"].reshape((args["features"].shape[0],1))
map01bf@720 91 args["nVect"], args["nDim"] = args["features"].shape
map01bf@717 92 args["features"] = args["features"].flatten()
mas01mc@744 93 if(self.hasPower and powerData != None):
mas01mc@743 94 if (len(args["power"].shape) == 1) :
mas01mc@743 95 args["power"] = args["power"].reshape((args["power"].shape[0],1))
mas01mc@743 96 args["power"] = args["power"].flatten()
map01bf@717 97 print "args: " + str(args)
map01bf@719 98 ok = _pyadb._pyadb_insertFromArray(**args)
map01bf@719 99 if not (ok==0):
map01bf@719 100 raise RuntimeError("Direct data insertion failed for an unknown reason. err code = %i"%ok)
map01bf@717 101 else:
map01bf@717 102 self._updateDBAttributes()
map01bf@717 103 return
map01bf@628 104
map01bf@628 105 def configCheck(self, scrub=False):
map01bf@628 106 """examine self.configQuery dict. For each key encouters confirm it is in the validConfigTerms list and if appropriate, type check. If scrub is False, leave unexpected keys and values alone and return False, if scrub try to correct errors (attempt type casts and remove unexpected entries) and continue. If self.configQuery only contains expected keys with correctly typed values, return True. See Pyadb.validConfigTerms for allowed keys and types. Note also that include/exclude key lists memebers or string switched are not verified here, but rather when they are converted to const char * in the C api call and if malformed, an error will be rasied from there. Valid keys and values in queryconfig:
map01bf@628 107 {seqLength : Int Sequence Length, \n\
map01bf@628 108 seqStart : Int offset from start for key, \n\
map01bf@628 109 exhaustive : boolean - True for exhaustive (false by default),\n\
map01bf@628 110 falsePositives: boolean - True to keep fps (false by defaults),\n\
map01bf@628 111 accumulation : [\"db\"|\"track\"|\"one2one\"] (\"db\" by default),\n\
map01bf@628 112 distance : [\"dot\"|\"eucNorm\"|\"euclidean\"] (\"dot\" by default),\n\
map01bf@628 113 npoints : int number of points per track,\n\
map01bf@628 114 ntracks : max number of results returned in db accu mode,\n\
map01bf@628 115 includeKeys : list of strings to include (use all by default),\n\
map01bf@628 116 excludeKeys : list of strings to exclude (none by default),\n\
map01bf@628 117 radius : double of nnRadius (1.0 default, overrides npoints if specified),\n\
map01bf@628 118 absThres : double absolute power threshold (db must have power),\n\
map01bf@628 119 relThres : double relative power threshold (db must have power),\n\
map01bf@628 120 durRatio : double time expansion/compresion ratio,\n\
map01bf@628 121 hopSize : int hopsize (1 by default)])->resultDict\n\
map01bf@628 122 resFmt : [\"list\"|\"dict\"](\"dict\" by default)}"""
mas01mj@630 123 for key in self.configQuery.keys():
map01bf@628 124 if key not in Pyadb.validConfigTerms.keys():
mas01mj@630 125 if not scrub: return False
map01bf@632 126 print "scrubbing %s from query config."%str(key)
mas01mj@630 127 del self.configQuery[key]
mas01mj@630 128 if not isinstance(self.configQuery[key], Pyadb.validConfigTerms[key]):
mas01mj@630 129 if not scrub: return False
mas01mj@630 130 self.configQuery[key] = Pyadb.validConfigTerms[key](self.configQuery[key])#hrm, syntax?
mas01mj@630 131 return True
map01bf@628 132
map01bf@628 133 #
map01bf@628 134
map01bf@632 135 def query(self, key=None, featData=None, strictConfig=True):
map01bf@628 136 """query the database. Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
map01bf@628 137 if not self.configCheck():
map01bf@628 138 if strictConfig:
map01bf@628 139 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
map01bf@628 140 Only keys found in Pyadb.validConfigTerms may be defined")
map01bf@628 141 else:
map01bf@632 142 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
map01bf@632 143 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
map01bf@628 144 self.configCheck(scrub=True)
map01bf@628 145 if ((not key and not featData) or (key and featData)):
map01bf@628 146 raise Usage("query require either key or featData to be defined, you have defined both or neither.")
map01bf@628 147 if key:
map01bf@628 148 result = _pyadb._pyadb_queryFromKey(self._db, key, **self.configQuery)
map01bf@628 149 elif featData:
map01bf@628 150 raise NotImplementedError("direct data query not yet implemented. Sorry.")
mas01mj@630 151 return Pyadb.Result(result, self.configQuery)
map01bf@625 152
map01bf@638 153 def status(self):
map01bf@638 154 '''update attributes and return them as a dict'''
map01bf@638 155 self._updateDBAttributes()
map01bf@638 156 return { "numFiles" : self.numFiles,
map01bf@638 157 "dims" : self.dims,
map01bf@638 158 "dudCount" : self.dudCount,
map01bf@638 159 "nullCount": self.nullCount,
map01bf@638 160 "length" : self.length,
map01bf@638 161 "data_region_size" : self.data_region_size,
map01bf@638 162 "l2Normed" : self.l2Normed,
map01bf@638 163 "hasPower" : self.hasPower,
map01bf@638 164 "hasTimes" : self.hasTimes,
map01bf@638 165 "usesRefs" : self.usesRefs}
map01bf@625 166 ###internal methods###
map01bf@625 167 def _updateDBAttributes(self):
map01bf@625 168 '''run _pyadb_status to fill/update the database level flags and info'''
map01bf@625 169 rawFlags = long(0)
map01bf@625 170 (self.numFiles,
map01bf@625 171 self.dims,
map01bf@625 172 self.dudCount,
map01bf@625 173 self.nullCount,
map01bf@625 174 rawFlags,
map01bf@625 175 self.length,
map01bf@625 176 self.data_region_size) = _pyadb._pyadb_status(self._db)
map01bf@625 177 self.l2Normed = bool(rawFlags & ADB_HEADER_FLAG_L2NORM)
map01bf@625 178 self.hasPower = bool(rawFlags & ADB_HEADER_FLAG_POWER)
map01bf@625 179 self.hasTimes = bool(rawFlags & ADB_HEADER_FLAG_TIMES)
map01bf@625 180 self.usesRefs = bool(rawFlags & ADB_HEADER_FLAG_REFERENCES)
map01bf@625 181 return
map01bf@625 182
map01bf@628 183 class Result(object):
map01bf@628 184 def __init__(self, rawData, currentConfig):
map01bf@628 185 self.rawData = rawData
map01bf@628 186 if "resFmt" in currentConfig:
map01bf@628 187 self.type = currentConfig["resFmt"]
map01bf@628 188 else:
map01bf@628 189 self.type = "dict"
map01bf@628 190 def __str__(self):
mas01mj@631 191 return str(self.rawData)
map01bf@628 192 def __repr__(self):
mas01mj@631 193 return repr(self.rawData)
map01bf@625 194
map01bf@625 195 class untitledTests(unittest.TestCase):
map01bf@625 196 def setUp(self):
map01bf@625 197 pass
map01bf@625 198
map01bf@625 199
map01bf@625 200 if __name__ == '__main__':
mas01mj@630 201 unittest.main()