map01bf@625
|
1 #!/usr/bin/env python
|
map01bf@625
|
2 # encoding: utf-8
|
map01bf@625
|
3 """
|
map01bf@625
|
4 pyadb.py
|
map01bf@625
|
5
|
map01bf@625
|
6 public access and class structure for python audioDb api bindings.
|
map01bf@625
|
7
|
map01bf@628
|
8
|
map01bf@628
|
9
|
map01bf@625
|
10 Created by Benjamin Fields on 2009-09-22.
|
map01bf@625
|
11 Copyright (c) 2009 Goldsmith University of London.
|
map01bf@625
|
12 """
|
map01bf@625
|
13
|
map01bf@625
|
14 import sys
|
map01bf@625
|
15 import os, os.path
|
map01bf@625
|
16 import unittest
|
map01bf@625
|
17 import _pyadb
|
map01bf@625
|
18
|
map01bf@717
|
19 ADB_HEADER_FLAG_L2NORM = 0x1 #annoyingly I can't find a means
|
map01bf@717
|
20 ADB_HEADER_FLAG_POWER = 0x4 #around defining these flag definitions
|
map01bf@717
|
21 ADB_HEADER_FLAG_TIMES = 0x20 #as they aren't even exported to the
|
map01bf@717
|
22 ADB_HEADER_FLAG_REFERENCES = 0x40 #api, so this is the only way to get them.
|
map01bf@625
|
23
|
map01bf@625
|
24 class Usage(Exception):
|
map01bf@628
|
25 """error to indicate that a method has been called with incorrect args"""
|
map01bf@628
|
26 def __init__(self, msg):
|
map01bf@628
|
27 self.msg = msg
|
map01bf@625
|
28
|
map01bf@628
|
29 class Pyadb(object):
|
map01bf@628
|
30 """Pyadb class. Allows for creation, access, insertion and query of an audioDB vector matching database."""
|
map01bf@628
|
31 validConfigTerms = {"seqLength":int, "seqStart":int, "exhaustive":bool,
|
map01bf@628
|
32 "falsePositives":bool, "accumulation":str, "distance":str, "npoints":int,
|
map01bf@628
|
33 "ntracks":int, "includeKeys":list, "excludeKeys":list, "radius":float, "absThres":float,
|
map01bf@628
|
34 "relThres":float, "durRatio":float, "hopSize":int, "resFmt":str}
|
map01bf@721
|
35 def __init__(self, path, mode='w', datasize=0, ntracks=0, datadim=0):
|
map01bf@721
|
36 """
|
map01bf@721
|
37 initialize the database. By default db will hold 20000 tracks, be 2GB in size and determine datadim from the first inserted feature
|
map01bf@721
|
38 """
|
map01bf@625
|
39 self.path = path
|
map01bf@628
|
40 self.configQuery = {}
|
map01bf@625
|
41 if not (mode=='w' or mode =='r'):
|
map01bf@625
|
42 raise(ValueError, "if specified, mode must be either\'r\' or \'w\'.")
|
map01bf@625
|
43 if os.path.exists(path):
|
map01bf@625
|
44 self._db = _pyadb._pyadb_open(path, mode)
|
map01bf@625
|
45 else:
|
map01bf@721
|
46 self._db = _pyadb._pyadb_create(path,datasize,ntracks,datadim)
|
map01bf@625
|
47 self._updateDBAttributes()
|
map01bf@625
|
48 return
|
map01bf@625
|
49
|
map01bf@625
|
50 def insert(self, featFile=None, powerFile=None, timesFile=None, featData=None, powerData=None, timesData=None, key=None):
|
map01bf@717
|
51 """
|
map01bf@717
|
52 Insert features into database. Can be done with data provided directly or by giving a path to a binary fftExtract style feature file. If power and/or timing is engaged in the database header, it must be provided (via the same means as the feature) or a Usage exception will be raised. Power files should be of the same binary type as features. Times files should be the ascii number length of time in seconds from the begining of the file to segment start, one per line.
|
map01bf@717
|
53 If providing data directly, featData should be a numpy array with shape= (number of Dimensions, number of Vectors)
|
map01bf@717
|
54 """
|
map01bf@628
|
55 #While python style normally advocates leaping before looking, these check are nessecary as
|
map01bf@625
|
56 #it is very difficult to assertain why the insertion failed once it has been called.
|
mas01mc@743
|
57 if (self.hasPower and (((featFile) and powerFile==None) or ((not featData==None) and powerData==None))):
|
map01bf@625
|
58 raise(Usage, "The db you are attempting an insert on (%s) expects power and you either\
|
map01bf@625
|
59 haven't provided any or have done so in the wrong format."%self.path)
|
mas01mc@743
|
60 if (self.hasTimes and (((timesFile) and timesFile==None) or ((not timesData==None) and timesData==None))):
|
map01bf@625
|
61 raise(Usage, "The db you are attempting an insert on (%s) expects times and you either\
|
map01bf@625
|
62 haven't provided any or have done so in the wrong format."%self.path)
|
map01bf@625
|
63 args = {"db":self._db}
|
map01bf@625
|
64 if featFile:
|
map01bf@625
|
65 args["features"] = featFile
|
map01bf@717
|
66 elif (featData != None):
|
map01bf@625
|
67 args["features"] = featData
|
map01bf@625
|
68 else:
|
map01bf@625
|
69 raise(Usage, "Must provide some feature data!")
|
map01bf@625
|
70 if self.hasPower:
|
map01bf@625
|
71 if featFile:
|
map01bf@625
|
72 args["power"]=powerFile
|
mas01mc@743
|
73 elif featData.any():
|
mas01mc@743
|
74 args["power"]=powerData
|
mas01mc@749
|
75 if timesData != None:
|
mas01mc@749
|
76 self.hasTimes=True
|
map01bf@625
|
77 if self.hasTimes:
|
map01bf@625
|
78 if featFile:
|
map01bf@625
|
79 args["times"]=timesFile
|
mas01mc@743
|
80 elif timesData.any():
|
mas01mc@749
|
81 args["times"]=timesData
|
map01bf@625
|
82 if key:
|
map01bf@625
|
83 args["key"]=str(key)
|
map01bf@625
|
84 if featFile:
|
map01bf@625
|
85 if not _pyadb._pyadb_insertFromFile(**args):
|
map01bf@719
|
86 raise RuntimeError("Insertion from file failed for an unknown reason.")
|
map01bf@625
|
87 else:
|
map01bf@625
|
88 self._updateDBAttributes()
|
map01bf@625
|
89 return
|
map01bf@717
|
90 elif (featData != None):
|
mas01mc@743
|
91 if (len(args["features"].shape) == 1) :
|
mas01mc@743
|
92 args["features"] = args["features"].reshape((args["features"].shape[0],1))
|
map01bf@720
|
93 args["nVect"], args["nDim"] = args["features"].shape
|
map01bf@717
|
94 args["features"] = args["features"].flatten()
|
mas01mc@744
|
95 if(self.hasPower and powerData != None):
|
mas01mc@743
|
96 if (len(args["power"].shape) == 1) :
|
mas01mc@743
|
97 args["power"] = args["power"].reshape((args["power"].shape[0],1))
|
mas01mc@743
|
98 args["power"] = args["power"].flatten()
|
mas01mc@749
|
99 if(self.hasTimes and timesData != None):
|
mas01mc@749
|
100 if (len(args["times"].shape) == 1) :
|
mas01mc@749
|
101 args["times"] = args["times"].reshape((args["times"].shape[0],1))
|
mas01mc@749
|
102 args["times"] = args["times"].flatten()
|
mas01mc@749
|
103
|
map01bf@717
|
104 print "args: " + str(args)
|
map01bf@719
|
105 ok = _pyadb._pyadb_insertFromArray(**args)
|
map01bf@719
|
106 if not (ok==0):
|
map01bf@719
|
107 raise RuntimeError("Direct data insertion failed for an unknown reason. err code = %i"%ok)
|
map01bf@717
|
108 else:
|
map01bf@717
|
109 self._updateDBAttributes()
|
map01bf@717
|
110 return
|
map01bf@628
|
111
|
map01bf@628
|
112 def configCheck(self, scrub=False):
|
map01bf@628
|
113 """examine self.configQuery dict. For each key encouters confirm it is in the validConfigTerms list and if appropriate, type check. If scrub is False, leave unexpected keys and values alone and return False, if scrub try to correct errors (attempt type casts and remove unexpected entries) and continue. If self.configQuery only contains expected keys with correctly typed values, return True. See Pyadb.validConfigTerms for allowed keys and types. Note also that include/exclude key lists memebers or string switched are not verified here, but rather when they are converted to const char * in the C api call and if malformed, an error will be rasied from there. Valid keys and values in queryconfig:
|
map01bf@628
|
114 {seqLength : Int Sequence Length, \n\
|
map01bf@628
|
115 seqStart : Int offset from start for key, \n\
|
map01bf@628
|
116 exhaustive : boolean - True for exhaustive (false by default),\n\
|
map01bf@628
|
117 falsePositives: boolean - True to keep fps (false by defaults),\n\
|
map01bf@628
|
118 accumulation : [\"db\"|\"track\"|\"one2one\"] (\"db\" by default),\n\
|
map01bf@628
|
119 distance : [\"dot\"|\"eucNorm\"|\"euclidean\"] (\"dot\" by default),\n\
|
map01bf@628
|
120 npoints : int number of points per track,\n\
|
map01bf@628
|
121 ntracks : max number of results returned in db accu mode,\n\
|
map01bf@628
|
122 includeKeys : list of strings to include (use all by default),\n\
|
map01bf@628
|
123 excludeKeys : list of strings to exclude (none by default),\n\
|
map01bf@628
|
124 radius : double of nnRadius (1.0 default, overrides npoints if specified),\n\
|
map01bf@628
|
125 absThres : double absolute power threshold (db must have power),\n\
|
map01bf@628
|
126 relThres : double relative power threshold (db must have power),\n\
|
map01bf@628
|
127 durRatio : double time expansion/compresion ratio,\n\
|
map01bf@628
|
128 hopSize : int hopsize (1 by default)])->resultDict\n\
|
map01bf@628
|
129 resFmt : [\"list\"|\"dict\"](\"dict\" by default)}"""
|
mas01mj@630
|
130 for key in self.configQuery.keys():
|
map01bf@628
|
131 if key not in Pyadb.validConfigTerms.keys():
|
mas01mj@630
|
132 if not scrub: return False
|
map01bf@632
|
133 print "scrubbing %s from query config."%str(key)
|
mas01mj@630
|
134 del self.configQuery[key]
|
mas01mj@630
|
135 if not isinstance(self.configQuery[key], Pyadb.validConfigTerms[key]):
|
mas01mj@630
|
136 if not scrub: return False
|
mas01mj@630
|
137 self.configQuery[key] = Pyadb.validConfigTerms[key](self.configQuery[key])#hrm, syntax?
|
mas01mj@630
|
138 return True
|
map01bf@628
|
139
|
map01bf@628
|
140 #
|
map01bf@628
|
141
|
map01bf@632
|
142 def query(self, key=None, featData=None, strictConfig=True):
|
map01bf@628
|
143 """query the database. Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
|
map01bf@628
|
144 if not self.configCheck():
|
map01bf@628
|
145 if strictConfig:
|
map01bf@628
|
146 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
|
map01bf@628
|
147 Only keys found in Pyadb.validConfigTerms may be defined")
|
map01bf@628
|
148 else:
|
map01bf@632
|
149 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
|
map01bf@632
|
150 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
|
map01bf@628
|
151 self.configCheck(scrub=True)
|
map01bf@628
|
152 if ((not key and not featData) or (key and featData)):
|
map01bf@628
|
153 raise Usage("query require either key or featData to be defined, you have defined both or neither.")
|
map01bf@628
|
154 if key:
|
map01bf@628
|
155 result = _pyadb._pyadb_queryFromKey(self._db, key, **self.configQuery)
|
map01bf@628
|
156 elif featData:
|
map01bf@628
|
157 raise NotImplementedError("direct data query not yet implemented. Sorry.")
|
mas01mj@630
|
158 return Pyadb.Result(result, self.configQuery)
|
map01bf@625
|
159
|
map01bf@638
|
160 def status(self):
|
map01bf@638
|
161 '''update attributes and return them as a dict'''
|
map01bf@638
|
162 self._updateDBAttributes()
|
map01bf@638
|
163 return { "numFiles" : self.numFiles,
|
map01bf@638
|
164 "dims" : self.dims,
|
map01bf@638
|
165 "dudCount" : self.dudCount,
|
map01bf@638
|
166 "nullCount": self.nullCount,
|
map01bf@638
|
167 "length" : self.length,
|
map01bf@638
|
168 "data_region_size" : self.data_region_size,
|
map01bf@638
|
169 "l2Normed" : self.l2Normed,
|
map01bf@638
|
170 "hasPower" : self.hasPower,
|
map01bf@638
|
171 "hasTimes" : self.hasTimes,
|
map01bf@638
|
172 "usesRefs" : self.usesRefs}
|
map01bf@625
|
173 ###internal methods###
|
map01bf@625
|
174 def _updateDBAttributes(self):
|
map01bf@625
|
175 '''run _pyadb_status to fill/update the database level flags and info'''
|
map01bf@625
|
176 rawFlags = long(0)
|
map01bf@625
|
177 (self.numFiles,
|
map01bf@625
|
178 self.dims,
|
map01bf@625
|
179 self.dudCount,
|
map01bf@625
|
180 self.nullCount,
|
map01bf@625
|
181 rawFlags,
|
map01bf@625
|
182 self.length,
|
map01bf@625
|
183 self.data_region_size) = _pyadb._pyadb_status(self._db)
|
map01bf@625
|
184 self.l2Normed = bool(rawFlags & ADB_HEADER_FLAG_L2NORM)
|
map01bf@625
|
185 self.hasPower = bool(rawFlags & ADB_HEADER_FLAG_POWER)
|
map01bf@625
|
186 self.hasTimes = bool(rawFlags & ADB_HEADER_FLAG_TIMES)
|
map01bf@625
|
187 self.usesRefs = bool(rawFlags & ADB_HEADER_FLAG_REFERENCES)
|
map01bf@625
|
188 return
|
map01bf@625
|
189
|
map01bf@628
|
190 class Result(object):
|
map01bf@628
|
191 def __init__(self, rawData, currentConfig):
|
map01bf@628
|
192 self.rawData = rawData
|
map01bf@628
|
193 if "resFmt" in currentConfig:
|
map01bf@628
|
194 self.type = currentConfig["resFmt"]
|
map01bf@628
|
195 else:
|
map01bf@628
|
196 self.type = "dict"
|
map01bf@628
|
197 def __str__(self):
|
mas01mj@631
|
198 return str(self.rawData)
|
map01bf@628
|
199 def __repr__(self):
|
mas01mj@631
|
200 return repr(self.rawData)
|
map01bf@625
|
201
|
mas01mc@748
|
202 def liszt(self):
|
mas01mc@748
|
203 '''run _pyadb_liszt to get a list of database keys'''
|
mas01mc@748
|
204 if self._db != None:
|
mas01mc@748
|
205 return _pyadb._pyadb_liszt(self._db)
|
mas01mc@748
|
206 else:
|
mas01mc@748
|
207 print "Error in liszt(): ADB database not defined"
|
mas01mc@748
|
208 return 0
|
mas01mc@748
|
209
|
mas01mc@748
|
210 def retrieve_datum(self, key, **args):
|
mas01mc@748
|
211 '''run _pyadb_retrieveDatum to retrieve data by key:
|
mas01mc@748
|
212 features=True, to get features
|
mas01mc@748
|
213 powers=True, to get Powers
|
mas01mc@748
|
214 times=True, to get Times
|
mas01mc@748
|
215 '''
|
mas01mc@748
|
216 if self._db != None:
|
mas01mc@748
|
217 return _pyadb._pyadb_retrieveDatum(self._db, key=key, **args)
|
mas01mc@748
|
218 else:
|
mas01mc@748
|
219 print "Error in liszt(): ADB database not defined"
|
mas01mc@748
|
220 return 0
|
mas01mc@748
|
221
|
map01bf@625
|
222 class untitledTests(unittest.TestCase):
|
map01bf@625
|
223 def setUp(self):
|
map01bf@625
|
224 pass
|
map01bf@625
|
225
|
map01bf@625
|
226
|
map01bf@625
|
227 if __name__ == '__main__':
|
mas01mj@630
|
228 unittest.main()
|