map01bf@625
|
1 #!/usr/bin/env python
|
map01bf@625
|
2 # encoding: utf-8
|
map01bf@625
|
3 """
|
map01bf@625
|
4 pyadb.py
|
map01bf@625
|
5
|
map01bf@625
|
6 public access and class structure for python audioDb api bindings.
|
map01bf@625
|
7
|
map01bf@628
|
8
|
map01bf@628
|
9
|
map01bf@625
|
10 Created by Benjamin Fields on 2009-09-22.
|
map01bf@625
|
11 Copyright (c) 2009 Goldsmith University of London.
|
map01bf@625
|
12 """
|
map01bf@625
|
13
|
map01bf@625
|
14 import sys
|
map01bf@625
|
15 import os, os.path
|
map01bf@625
|
16 import unittest
|
map01bf@625
|
17 import _pyadb
|
map01bf@625
|
18
|
map01bf@717
|
19 ADB_HEADER_FLAG_L2NORM = 0x1 #annoyingly I can't find a means
|
map01bf@717
|
20 ADB_HEADER_FLAG_POWER = 0x4 #around defining these flag definitions
|
map01bf@717
|
21 ADB_HEADER_FLAG_TIMES = 0x20 #as they aren't even exported to the
|
map01bf@717
|
22 ADB_HEADER_FLAG_REFERENCES = 0x40 #api, so this is the only way to get them.
|
map01bf@625
|
23
|
map01bf@625
|
24 class Usage(Exception):
|
map01bf@628
|
25 """error to indicate that a method has been called with incorrect args"""
|
map01bf@628
|
26 def __init__(self, msg):
|
map01bf@628
|
27 self.msg = msg
|
map01bf@625
|
28
|
map01bf@628
|
29 class Pyadb(object):
|
map01bf@628
|
30 """Pyadb class. Allows for creation, access, insertion and query of an audioDB vector matching database."""
|
map01bf@628
|
31 validConfigTerms = {"seqLength":int, "seqStart":int, "exhaustive":bool,
|
map01bf@628
|
32 "falsePositives":bool, "accumulation":str, "distance":str, "npoints":int,
|
map01bf@628
|
33 "ntracks":int, "includeKeys":list, "excludeKeys":list, "radius":float, "absThres":float,
|
map01bf@628
|
34 "relThres":float, "durRatio":float, "hopSize":int, "resFmt":str}
|
map01bf@721
|
35 def __init__(self, path, mode='w', datasize=0, ntracks=0, datadim=0):
|
map01bf@721
|
36 """
|
map01bf@721
|
37 initialize the database. By default db will hold 20000 tracks, be 2GB in size and determine datadim from the first inserted feature
|
map01bf@721
|
38 """
|
map01bf@625
|
39 self.path = path
|
map01bf@628
|
40 self.configQuery = {}
|
map01bf@625
|
41 if not (mode=='w' or mode =='r'):
|
map01bf@625
|
42 raise(ValueError, "if specified, mode must be either\'r\' or \'w\'.")
|
map01bf@625
|
43 if os.path.exists(path):
|
map01bf@625
|
44 self._db = _pyadb._pyadb_open(path, mode)
|
map01bf@625
|
45 else:
|
map01bf@721
|
46 self._db = _pyadb._pyadb_create(path,datasize,ntracks,datadim)
|
map01bf@625
|
47 self._updateDBAttributes()
|
map01bf@625
|
48 return
|
map01bf@625
|
49
|
map01bf@625
|
50 def insert(self, featFile=None, powerFile=None, timesFile=None, featData=None, powerData=None, timesData=None, key=None):
|
map01bf@717
|
51 """
|
map01bf@717
|
52 Insert features into database. Can be done with data provided directly or by giving a path to a binary fftExtract style feature file. If power and/or timing is engaged in the database header, it must be provided (via the same means as the feature) or a Usage exception will be raised. Power files should be of the same binary type as features. Times files should be the ascii number length of time in seconds from the begining of the file to segment start, one per line.
|
map01bf@717
|
53 If providing data directly, featData should be a numpy array with shape= (number of Dimensions, number of Vectors)
|
map01bf@717
|
54 """
|
map01bf@628
|
55 #While python style normally advocates leaping before looking, these check are nessecary as
|
map01bf@625
|
56 #it is very difficult to assertain why the insertion failed once it has been called.
|
mas01mc@743
|
57 if (self.hasPower and (((featFile) and powerFile==None) or ((not featData==None) and powerData==None))):
|
map01bf@625
|
58 raise(Usage, "The db you are attempting an insert on (%s) expects power and you either\
|
map01bf@625
|
59 haven't provided any or have done so in the wrong format."%self.path)
|
mas01mc@743
|
60 if (self.hasTimes and (((timesFile) and timesFile==None) or ((not timesData==None) and timesData==None))):
|
map01bf@625
|
61 raise(Usage, "The db you are attempting an insert on (%s) expects times and you either\
|
map01bf@625
|
62 haven't provided any or have done so in the wrong format."%self.path)
|
map01bf@625
|
63 args = {"db":self._db}
|
map01bf@625
|
64 if featFile:
|
map01bf@625
|
65 args["features"] = featFile
|
map01bf@717
|
66 elif (featData != None):
|
map01bf@625
|
67 args["features"] = featData
|
map01bf@625
|
68 else:
|
map01bf@625
|
69 raise(Usage, "Must provide some feature data!")
|
map01bf@625
|
70 if self.hasPower:
|
map01bf@625
|
71 if featFile:
|
map01bf@625
|
72 args["power"]=powerFile
|
mas01mc@743
|
73 elif featData.any():
|
mas01mc@743
|
74 args["power"]=powerData
|
mas01mc@749
|
75 if timesData != None:
|
mas01mc@749
|
76 self.hasTimes=True
|
map01bf@625
|
77 if self.hasTimes:
|
map01bf@625
|
78 if featFile:
|
map01bf@625
|
79 args["times"]=timesFile
|
mas01mc@743
|
80 elif timesData.any():
|
mas01mc@749
|
81 args["times"]=timesData
|
map01bf@625
|
82 if key:
|
map01bf@625
|
83 args["key"]=str(key)
|
map01bf@625
|
84 if featFile:
|
map01bf@625
|
85 if not _pyadb._pyadb_insertFromFile(**args):
|
map01bf@719
|
86 raise RuntimeError("Insertion from file failed for an unknown reason.")
|
map01bf@625
|
87 else:
|
map01bf@625
|
88 self._updateDBAttributes()
|
map01bf@625
|
89 return
|
map01bf@717
|
90 elif (featData != None):
|
mas01mc@743
|
91 if (len(args["features"].shape) == 1) :
|
mas01mc@743
|
92 args["features"] = args["features"].reshape((args["features"].shape[0],1))
|
map01bf@720
|
93 args["nVect"], args["nDim"] = args["features"].shape
|
map01bf@717
|
94 args["features"] = args["features"].flatten()
|
mas01mc@744
|
95 if(self.hasPower and powerData != None):
|
mas01mc@743
|
96 if (len(args["power"].shape) == 1) :
|
mas01mc@743
|
97 args["power"] = args["power"].reshape((args["power"].shape[0],1))
|
mas01mc@743
|
98 args["power"] = args["power"].flatten()
|
mas01mc@749
|
99 if(self.hasTimes and timesData != None):
|
mas01mc@749
|
100 if (len(args["times"].shape) == 1) :
|
mas01mc@749
|
101 args["times"] = args["times"].reshape((args["times"].shape[0],1))
|
mas01mc@749
|
102 args["times"] = args["times"].flatten()
|
mas01mc@749
|
103
|
map01bf@717
|
104 print "args: " + str(args)
|
map01bf@719
|
105 ok = _pyadb._pyadb_insertFromArray(**args)
|
map01bf@719
|
106 if not (ok==0):
|
map01bf@719
|
107 raise RuntimeError("Direct data insertion failed for an unknown reason. err code = %i"%ok)
|
map01bf@717
|
108 else:
|
map01bf@717
|
109 self._updateDBAttributes()
|
map01bf@717
|
110 return
|
map01bf@628
|
111
|
map01bf@628
|
112 def configCheck(self, scrub=False):
|
map01bf@628
|
113 """examine self.configQuery dict. For each key encouters confirm it is in the validConfigTerms list and if appropriate, type check. If scrub is False, leave unexpected keys and values alone and return False, if scrub try to correct errors (attempt type casts and remove unexpected entries) and continue. If self.configQuery only contains expected keys with correctly typed values, return True. See Pyadb.validConfigTerms for allowed keys and types. Note also that include/exclude key lists memebers or string switched are not verified here, but rather when they are converted to const char * in the C api call and if malformed, an error will be rasied from there. Valid keys and values in queryconfig:
|
map01bf@628
|
114 {seqLength : Int Sequence Length, \n\
|
map01bf@628
|
115 seqStart : Int offset from start for key, \n\
|
map01bf@628
|
116 exhaustive : boolean - True for exhaustive (false by default),\n\
|
map01bf@628
|
117 falsePositives: boolean - True to keep fps (false by defaults),\n\
|
map01bf@628
|
118 accumulation : [\"db\"|\"track\"|\"one2one\"] (\"db\" by default),\n\
|
mas01mc@768
|
119 distance : [\"dot\"|\"eucNorm\"|\"euclidean\"|\"kullback\"] (\"dot\" by default),\n\
|
map01bf@628
|
120 npoints : int number of points per track,\n\
|
map01bf@628
|
121 ntracks : max number of results returned in db accu mode,\n\
|
map01bf@628
|
122 includeKeys : list of strings to include (use all by default),\n\
|
map01bf@628
|
123 excludeKeys : list of strings to exclude (none by default),\n\
|
map01bf@628
|
124 radius : double of nnRadius (1.0 default, overrides npoints if specified),\n\
|
map01bf@628
|
125 absThres : double absolute power threshold (db must have power),\n\
|
map01bf@628
|
126 relThres : double relative power threshold (db must have power),\n\
|
map01bf@628
|
127 durRatio : double time expansion/compresion ratio,\n\
|
map01bf@628
|
128 hopSize : int hopsize (1 by default)])->resultDict\n\
|
map01bf@628
|
129 resFmt : [\"list\"|\"dict\"](\"dict\" by default)}"""
|
mas01mj@630
|
130 for key in self.configQuery.keys():
|
map01bf@628
|
131 if key not in Pyadb.validConfigTerms.keys():
|
mas01mj@630
|
132 if not scrub: return False
|
map01bf@632
|
133 print "scrubbing %s from query config."%str(key)
|
mas01mj@630
|
134 del self.configQuery[key]
|
mas01mj@630
|
135 if not isinstance(self.configQuery[key], Pyadb.validConfigTerms[key]):
|
mas01mj@630
|
136 if not scrub: return False
|
mas01mj@630
|
137 self.configQuery[key] = Pyadb.validConfigTerms[key](self.configQuery[key])#hrm, syntax?
|
mas01mj@630
|
138 return True
|
map01bf@628
|
139
|
map01bf@628
|
140 #
|
map01bf@628
|
141
|
map01bf@632
|
142 def query(self, key=None, featData=None, strictConfig=True):
|
map01bf@628
|
143 """query the database. Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
|
map01bf@628
|
144 if not self.configCheck():
|
map01bf@628
|
145 if strictConfig:
|
map01bf@628
|
146 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
|
map01bf@628
|
147 Only keys found in Pyadb.validConfigTerms may be defined")
|
map01bf@628
|
148 else:
|
map01bf@632
|
149 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
|
map01bf@632
|
150 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
|
map01bf@628
|
151 self.configCheck(scrub=True)
|
map01bf@628
|
152 if ((not key and not featData) or (key and featData)):
|
map01bf@628
|
153 raise Usage("query require either key or featData to be defined, you have defined both or neither.")
|
map01bf@628
|
154 if key:
|
map01bf@628
|
155 result = _pyadb._pyadb_queryFromKey(self._db, key, **self.configQuery)
|
map01bf@628
|
156 elif featData:
|
map01bf@628
|
157 raise NotImplementedError("direct data query not yet implemented. Sorry.")
|
mas01mj@630
|
158 return Pyadb.Result(result, self.configQuery)
|
mas01mc@750
|
159
|
mas01mc@750
|
160 def query_data(self, featData=None, powerData=None, timesData=None, strictConfig=True):
|
mas01mc@750
|
161 """query the database using numpy arrays. required data: featData, optional data: [powerData, timesData]Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
|
mas01mc@750
|
162 if not self.configCheck():
|
mas01mc@750
|
163 if strictConfig:
|
mas01mc@750
|
164 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
|
mas01mc@750
|
165 Only keys found in Pyadb.validConfigTerms may be defined")
|
mas01mc@750
|
166 else:
|
mas01mc@750
|
167 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
|
mas01mc@750
|
168 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
|
mas01mc@750
|
169 self.configCheck(scrub=True)
|
mas01mc@750
|
170 cq = self.configQuery.copy()
|
mas01mc@750
|
171 if (featData==None):
|
mas01mc@750
|
172 raise Usage("query requires featData to be defined.")
|
mas01mc@750
|
173 if(powerData!=None):
|
mas01mc@750
|
174 cq['power']=powerData
|
mas01mc@750
|
175 if(timesData!=None):
|
mas01mc@750
|
176 cq['times']=timesData
|
mas01mc@750
|
177 result = _pyadb._pyadb_queryFromData(self._db, featData, **cq)
|
mas01mc@750
|
178 return Pyadb.Result(result, self.configQuery)
|
map01bf@625
|
179
|
map01bf@638
|
180 def status(self):
|
map01bf@638
|
181 '''update attributes and return them as a dict'''
|
map01bf@638
|
182 self._updateDBAttributes()
|
map01bf@638
|
183 return { "numFiles" : self.numFiles,
|
map01bf@638
|
184 "dims" : self.dims,
|
map01bf@638
|
185 "dudCount" : self.dudCount,
|
map01bf@638
|
186 "nullCount": self.nullCount,
|
map01bf@638
|
187 "length" : self.length,
|
map01bf@638
|
188 "data_region_size" : self.data_region_size,
|
map01bf@638
|
189 "l2Normed" : self.l2Normed,
|
map01bf@638
|
190 "hasPower" : self.hasPower,
|
map01bf@638
|
191 "hasTimes" : self.hasTimes,
|
map01bf@638
|
192 "usesRefs" : self.usesRefs}
|
map01bf@625
|
193 ###internal methods###
|
map01bf@625
|
194 def _updateDBAttributes(self):
|
map01bf@625
|
195 '''run _pyadb_status to fill/update the database level flags and info'''
|
map01bf@625
|
196 rawFlags = long(0)
|
map01bf@625
|
197 (self.numFiles,
|
map01bf@625
|
198 self.dims,
|
map01bf@625
|
199 self.dudCount,
|
map01bf@625
|
200 self.nullCount,
|
map01bf@625
|
201 rawFlags,
|
map01bf@625
|
202 self.length,
|
map01bf@625
|
203 self.data_region_size) = _pyadb._pyadb_status(self._db)
|
map01bf@625
|
204 self.l2Normed = bool(rawFlags & ADB_HEADER_FLAG_L2NORM)
|
map01bf@625
|
205 self.hasPower = bool(rawFlags & ADB_HEADER_FLAG_POWER)
|
map01bf@625
|
206 self.hasTimes = bool(rawFlags & ADB_HEADER_FLAG_TIMES)
|
map01bf@625
|
207 self.usesRefs = bool(rawFlags & ADB_HEADER_FLAG_REFERENCES)
|
map01bf@625
|
208 return
|
map01bf@625
|
209
|
map01bf@628
|
210 class Result(object):
|
map01bf@628
|
211 def __init__(self, rawData, currentConfig):
|
map01bf@628
|
212 self.rawData = rawData
|
map01bf@628
|
213 if "resFmt" in currentConfig:
|
map01bf@628
|
214 self.type = currentConfig["resFmt"]
|
map01bf@628
|
215 else:
|
map01bf@628
|
216 self.type = "dict"
|
map01bf@628
|
217 def __str__(self):
|
mas01mj@631
|
218 return str(self.rawData)
|
map01bf@628
|
219 def __repr__(self):
|
mas01mj@631
|
220 return repr(self.rawData)
|
map01bf@625
|
221
|
mas01mc@748
|
222 def liszt(self):
|
mas01mc@748
|
223 '''run _pyadb_liszt to get a list of database keys'''
|
mas01mc@748
|
224 if self._db != None:
|
mas01mc@748
|
225 return _pyadb._pyadb_liszt(self._db)
|
mas01mc@748
|
226 else:
|
mas01mc@748
|
227 print "Error in liszt(): ADB database not defined"
|
mas01mc@748
|
228 return 0
|
mas01mc@748
|
229
|
mas01mc@748
|
230 def retrieve_datum(self, key, **args):
|
mas01mc@748
|
231 '''run _pyadb_retrieveDatum to retrieve data by key:
|
mas01mc@748
|
232 features=True, to get features
|
mas01mc@748
|
233 powers=True, to get Powers
|
mas01mc@748
|
234 times=True, to get Times
|
mas01mc@748
|
235 '''
|
mas01mc@748
|
236 if self._db != None:
|
mas01mc@748
|
237 return _pyadb._pyadb_retrieveDatum(self._db, key=key, **args)
|
mas01mc@748
|
238 else:
|
mas01mc@748
|
239 print "Error in liszt(): ADB database not defined"
|
mas01mc@748
|
240 return 0
|
mas01mc@748
|
241
|
map01bf@625
|
242 class untitledTests(unittest.TestCase):
|
map01bf@625
|
243 def setUp(self):
|
map01bf@625
|
244 pass
|
map01bf@625
|
245
|
map01bf@625
|
246
|
map01bf@625
|
247 if __name__ == '__main__':
|
mas01mj@630
|
248 unittest.main()
|