map01bf@625
|
1 #!/usr/bin/env python
|
map01bf@625
|
2 # encoding: utf-8
|
map01bf@625
|
3 """
|
map01bf@625
|
4 pyadb.py
|
map01bf@625
|
5
|
map01bf@625
|
6 public access and class structure for python audioDb api bindings.
|
map01bf@625
|
7
|
map01bf@628
|
8
|
map01bf@628
|
9
|
map01bf@625
|
10 Created by Benjamin Fields on 2009-09-22.
|
map01bf@625
|
11 Copyright (c) 2009 Goldsmith University of London.
|
map01bf@625
|
12 """
|
map01bf@625
|
13
|
map01bf@625
|
14 import sys
|
map01bf@625
|
15 import os, os.path
|
map01bf@625
|
16 import unittest
|
map01bf@625
|
17 import _pyadb
|
map01bf@625
|
18
|
map01bf@717
|
19 ADB_HEADER_FLAG_L2NORM = 0x1 #annoyingly I can't find a means
|
map01bf@717
|
20 ADB_HEADER_FLAG_POWER = 0x4 #around defining these flag definitions
|
map01bf@717
|
21 ADB_HEADER_FLAG_TIMES = 0x20 #as they aren't even exported to the
|
map01bf@717
|
22 ADB_HEADER_FLAG_REFERENCES = 0x40 #api, so this is the only way to get them.
|
map01bf@625
|
23
|
map01bf@625
|
24 class Usage(Exception):
|
map01bf@628
|
25 """error to indicate that a method has been called with incorrect args"""
|
map01bf@628
|
26 def __init__(self, msg):
|
map01bf@628
|
27 self.msg = msg
|
map01bf@625
|
28
|
map01bf@628
|
29 class Pyadb(object):
|
map01bf@628
|
30 """Pyadb class. Allows for creation, access, insertion and query of an audioDB vector matching database."""
|
map01bf@628
|
31 validConfigTerms = {"seqLength":int, "seqStart":int, "exhaustive":bool,
|
map01bf@628
|
32 "falsePositives":bool, "accumulation":str, "distance":str, "npoints":int,
|
map01bf@628
|
33 "ntracks":int, "includeKeys":list, "excludeKeys":list, "radius":float, "absThres":float,
|
map01bf@628
|
34 "relThres":float, "durRatio":float, "hopSize":int, "resFmt":str}
|
map01bf@721
|
35 def __init__(self, path, mode='w', datasize=0, ntracks=0, datadim=0):
|
map01bf@721
|
36 """
|
map01bf@721
|
37 initialize the database. By default db will hold 20000 tracks, be 2GB in size and determine datadim from the first inserted feature
|
map01bf@721
|
38 """
|
map01bf@625
|
39 self.path = path
|
map01bf@628
|
40 self.configQuery = {}
|
map01bf@625
|
41 if not (mode=='w' or mode =='r'):
|
map01bf@625
|
42 raise(ValueError, "if specified, mode must be either\'r\' or \'w\'.")
|
map01bf@625
|
43 if os.path.exists(path):
|
map01bf@625
|
44 self._db = _pyadb._pyadb_open(path, mode)
|
map01bf@625
|
45 else:
|
map01bf@721
|
46 self._db = _pyadb._pyadb_create(path,datasize,ntracks,datadim)
|
map01bf@625
|
47 self._updateDBAttributes()
|
map01bf@625
|
48 return
|
map01bf@625
|
49
|
map01bf@625
|
50 def insert(self, featFile=None, powerFile=None, timesFile=None, featData=None, powerData=None, timesData=None, key=None):
|
map01bf@717
|
51 """
|
map01bf@717
|
52 Insert features into database. Can be done with data provided directly or by giving a path to a binary fftExtract style feature file. If power and/or timing is engaged in the database header, it must be provided (via the same means as the feature) or a Usage exception will be raised. Power files should be of the same binary type as features. Times files should be the ascii number length of time in seconds from the begining of the file to segment start, one per line.
|
map01bf@717
|
53 If providing data directly, featData should be a numpy array with shape= (number of Dimensions, number of Vectors)
|
map01bf@717
|
54 """
|
map01bf@628
|
55 #While python style normally advocates leaping before looking, these check are nessecary as
|
map01bf@625
|
56 #it is very difficult to assertain why the insertion failed once it has been called.
|
map01bf@625
|
57 if (self.hasPower and (((featFile) and powerFile==None) or ((featData) and powerData==None))):
|
map01bf@625
|
58 raise(Usage, "The db you are attempting an insert on (%s) expects power and you either\
|
map01bf@625
|
59 haven't provided any or have done so in the wrong format."%self.path)
|
map01bf@625
|
60 if (self.hasTimes and (((timesFile) and timesFile==None) or ((timesData) and timesData==None))):
|
map01bf@625
|
61 raise(Usage, "The db you are attempting an insert on (%s) expects times and you either\
|
map01bf@625
|
62 haven't provided any or have done so in the wrong format."%self.path)
|
map01bf@625
|
63 args = {"db":self._db}
|
map01bf@625
|
64 if featFile:
|
map01bf@625
|
65 args["features"] = featFile
|
map01bf@717
|
66 elif (featData != None):
|
map01bf@625
|
67 args["features"] = featData
|
map01bf@625
|
68 else:
|
map01bf@625
|
69 raise(Usage, "Must provide some feature data!")
|
map01bf@625
|
70 if self.hasPower:
|
map01bf@625
|
71 if featFile:
|
map01bf@625
|
72 args["power"]=powerFile
|
map01bf@625
|
73 elif featData:
|
map01bf@625
|
74 pass
|
map01bf@625
|
75 if self.hasTimes:
|
map01bf@625
|
76 if featFile:
|
map01bf@625
|
77 args["times"]=timesFile
|
map01bf@625
|
78 elif timesData:
|
map01bf@625
|
79 pass
|
map01bf@625
|
80 if key:
|
map01bf@625
|
81 args["key"]=str(key)
|
map01bf@625
|
82 if featFile:
|
map01bf@625
|
83 if not _pyadb._pyadb_insertFromFile(**args):
|
map01bf@719
|
84 raise RuntimeError("Insertion from file failed for an unknown reason.")
|
map01bf@625
|
85 else:
|
map01bf@625
|
86 self._updateDBAttributes()
|
map01bf@625
|
87 return
|
map01bf@717
|
88 elif (featData != None):
|
map01bf@717
|
89 if (len(args["features"].shape) == 1) : args["features"] = args["features"].reshape((args["features"].shape[0],1))
|
map01bf@720
|
90 args["nVect"], args["nDim"] = args["features"].shape
|
map01bf@717
|
91 args["features"] = args["features"].flatten()
|
map01bf@717
|
92 print "args: " + str(args)
|
map01bf@719
|
93 ok = _pyadb._pyadb_insertFromArray(**args)
|
map01bf@719
|
94 if not (ok==0):
|
map01bf@719
|
95 raise RuntimeError("Direct data insertion failed for an unknown reason. err code = %i"%ok)
|
map01bf@717
|
96 else:
|
map01bf@717
|
97 self._updateDBAttributes()
|
map01bf@717
|
98 return
|
map01bf@628
|
99
|
map01bf@628
|
100 def configCheck(self, scrub=False):
|
map01bf@628
|
101 """examine self.configQuery dict. For each key encouters confirm it is in the validConfigTerms list and if appropriate, type check. If scrub is False, leave unexpected keys and values alone and return False, if scrub try to correct errors (attempt type casts and remove unexpected entries) and continue. If self.configQuery only contains expected keys with correctly typed values, return True. See Pyadb.validConfigTerms for allowed keys and types. Note also that include/exclude key lists memebers or string switched are not verified here, but rather when they are converted to const char * in the C api call and if malformed, an error will be rasied from there. Valid keys and values in queryconfig:
|
map01bf@628
|
102 {seqLength : Int Sequence Length, \n\
|
map01bf@628
|
103 seqStart : Int offset from start for key, \n\
|
map01bf@628
|
104 exhaustive : boolean - True for exhaustive (false by default),\n\
|
map01bf@628
|
105 falsePositives: boolean - True to keep fps (false by defaults),\n\
|
map01bf@628
|
106 accumulation : [\"db\"|\"track\"|\"one2one\"] (\"db\" by default),\n\
|
map01bf@628
|
107 distance : [\"dot\"|\"eucNorm\"|\"euclidean\"] (\"dot\" by default),\n\
|
map01bf@628
|
108 npoints : int number of points per track,\n\
|
map01bf@628
|
109 ntracks : max number of results returned in db accu mode,\n\
|
map01bf@628
|
110 includeKeys : list of strings to include (use all by default),\n\
|
map01bf@628
|
111 excludeKeys : list of strings to exclude (none by default),\n\
|
map01bf@628
|
112 radius : double of nnRadius (1.0 default, overrides npoints if specified),\n\
|
map01bf@628
|
113 absThres : double absolute power threshold (db must have power),\n\
|
map01bf@628
|
114 relThres : double relative power threshold (db must have power),\n\
|
map01bf@628
|
115 durRatio : double time expansion/compresion ratio,\n\
|
map01bf@628
|
116 hopSize : int hopsize (1 by default)])->resultDict\n\
|
map01bf@628
|
117 resFmt : [\"list\"|\"dict\"](\"dict\" by default)}"""
|
mas01mj@630
|
118 for key in self.configQuery.keys():
|
map01bf@628
|
119 if key not in Pyadb.validConfigTerms.keys():
|
mas01mj@630
|
120 if not scrub: return False
|
map01bf@632
|
121 print "scrubbing %s from query config."%str(key)
|
mas01mj@630
|
122 del self.configQuery[key]
|
mas01mj@630
|
123 if not isinstance(self.configQuery[key], Pyadb.validConfigTerms[key]):
|
mas01mj@630
|
124 if not scrub: return False
|
mas01mj@630
|
125 self.configQuery[key] = Pyadb.validConfigTerms[key](self.configQuery[key])#hrm, syntax?
|
mas01mj@630
|
126 return True
|
map01bf@628
|
127
|
map01bf@628
|
128 #
|
map01bf@628
|
129
|
map01bf@632
|
130 def query(self, key=None, featData=None, strictConfig=True):
|
map01bf@628
|
131 """query the database. Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
|
map01bf@628
|
132 if not self.configCheck():
|
map01bf@628
|
133 if strictConfig:
|
map01bf@628
|
134 raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
|
map01bf@628
|
135 Only keys found in Pyadb.validConfigTerms may be defined")
|
map01bf@628
|
136 else:
|
map01bf@632
|
137 print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
|
map01bf@632
|
138 Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..."
|
map01bf@628
|
139 self.configCheck(scrub=True)
|
map01bf@628
|
140 if ((not key and not featData) or (key and featData)):
|
map01bf@628
|
141 raise Usage("query require either key or featData to be defined, you have defined both or neither.")
|
map01bf@628
|
142 if key:
|
map01bf@628
|
143 result = _pyadb._pyadb_queryFromKey(self._db, key, **self.configQuery)
|
map01bf@628
|
144 elif featData:
|
map01bf@628
|
145 raise NotImplementedError("direct data query not yet implemented. Sorry.")
|
mas01mj@630
|
146 return Pyadb.Result(result, self.configQuery)
|
map01bf@625
|
147
|
map01bf@638
|
148 def status(self):
|
map01bf@638
|
149 '''update attributes and return them as a dict'''
|
map01bf@638
|
150 self._updateDBAttributes()
|
map01bf@638
|
151 return { "numFiles" : self.numFiles,
|
map01bf@638
|
152 "dims" : self.dims,
|
map01bf@638
|
153 "dudCount" : self.dudCount,
|
map01bf@638
|
154 "nullCount": self.nullCount,
|
map01bf@638
|
155 "length" : self.length,
|
map01bf@638
|
156 "data_region_size" : self.data_region_size,
|
map01bf@638
|
157 "l2Normed" : self.l2Normed,
|
map01bf@638
|
158 "hasPower" : self.hasPower,
|
map01bf@638
|
159 "hasTimes" : self.hasTimes,
|
map01bf@638
|
160 "usesRefs" : self.usesRefs}
|
map01bf@625
|
161 ###internal methods###
|
map01bf@625
|
162 def _updateDBAttributes(self):
|
map01bf@625
|
163 '''run _pyadb_status to fill/update the database level flags and info'''
|
map01bf@625
|
164 rawFlags = long(0)
|
map01bf@625
|
165 (self.numFiles,
|
map01bf@625
|
166 self.dims,
|
map01bf@625
|
167 self.dudCount,
|
map01bf@625
|
168 self.nullCount,
|
map01bf@625
|
169 rawFlags,
|
map01bf@625
|
170 self.length,
|
map01bf@625
|
171 self.data_region_size) = _pyadb._pyadb_status(self._db)
|
map01bf@625
|
172 self.l2Normed = bool(rawFlags & ADB_HEADER_FLAG_L2NORM)
|
map01bf@625
|
173 self.hasPower = bool(rawFlags & ADB_HEADER_FLAG_POWER)
|
map01bf@625
|
174 self.hasTimes = bool(rawFlags & ADB_HEADER_FLAG_TIMES)
|
map01bf@625
|
175 self.usesRefs = bool(rawFlags & ADB_HEADER_FLAG_REFERENCES)
|
map01bf@625
|
176 return
|
map01bf@625
|
177
|
map01bf@628
|
178 class Result(object):
|
map01bf@628
|
179 def __init__(self, rawData, currentConfig):
|
map01bf@628
|
180 self.rawData = rawData
|
map01bf@628
|
181 if "resFmt" in currentConfig:
|
map01bf@628
|
182 self.type = currentConfig["resFmt"]
|
map01bf@628
|
183 else:
|
map01bf@628
|
184 self.type = "dict"
|
map01bf@628
|
185 def __str__(self):
|
mas01mj@631
|
186 return str(self.rawData)
|
map01bf@628
|
187 def __repr__(self):
|
mas01mj@631
|
188 return repr(self.rawData)
|
map01bf@625
|
189
|
map01bf@625
|
190 class untitledTests(unittest.TestCase):
|
map01bf@625
|
191 def setUp(self):
|
map01bf@625
|
192 pass
|
map01bf@625
|
193
|
map01bf@625
|
194
|
map01bf@625
|
195 if __name__ == '__main__':
|
mas01mj@630
|
196 unittest.main()
|