Mercurial > hg > audiodb
comparison QueryADB.py @ 354:4871a3ed9e36 pre_api
Added serialized feature query Web Service, server-side only, example Python script: QueryADB.py -v
author | mas01mc |
---|---|
date | Fri, 07 Nov 2008 12:48:26 +0000 |
parents | |
children | f9d86b1db21c |
comparison
equal
deleted
inserted
replaced
347:0d02bcd74a40 | 354:4871a3ed9e36 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 # Python code/library to query the audioDB via the SOAP web interface. | |
4 # by Malcolm Slaney, August/September 2008 | |
5 # malcolm@ieee.org | |
6 | |
7 import sys, socket | |
8 from xml.dom.minidom import parseString | |
9 | |
10 global debug | |
11 debug = False | |
12 global dbName | |
13 dbName = 'tutorial.adb' | |
14 | |
15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2 | |
16 #serverHost = 'research-hm3.corp.sk1.yahoo.com' | |
17 serverHost = 'localhost' | |
18 serverPort = 14475 | |
19 | |
20 # Start the server on serverHost with | |
21 # ./audioDB -s 14475 | |
22 | |
23 # Here are the templates used for the different kinds of queries. We'll fill in the | |
24 # desired parameters are we go. | |
25 LIST_TEMPLATE = """ | |
26 <?xml version="1.0" encoding="UTF-8"?> | |
27 <SOAP-ENV:Envelope | |
28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" | |
29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" | |
30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
31 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
32 xmlns:adb="http://tempuri.org/adb.xsd"> | |
33 <SOAP-ENV:Body> | |
34 <adb:liszt> | |
35 <dbName>%s</dbName> | |
36 <lisztOffset>0</lisztOffset> | |
37 <lisztLength>100000</lisztLength> | |
38 </adb:liszt> | |
39 </SOAP-ENV:Body> | |
40 </SOAP-ENV:Envelope> | |
41 """ | |
42 | |
43 SEQUENCE_TEMPLATE = """ | |
44 <?xml version="1.0" encoding="UTF-8"?> | |
45 <SOAP-ENV:Envelope | |
46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" | |
47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" | |
48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
49 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
50 xmlns:adb="http://tempuri.org/adb.xsd"> | |
51 <SOAP-ENV:Body> | |
52 <adb:sequenceQueryByKey> | |
53 <dbName>%s</dbName> | |
54 <featureFileName>%s</featureFileName> | |
55 <queryType>%s</queryType> | |
56 <trackFileName></trackFileName> | |
57 <timesFileName></timesFileName> | |
58 <queryPoint>%s</queryPoint> | |
59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve --> | |
60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve --> | |
61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle --> | |
62 <radius>%s</radius> <!-- Distance radius to search --> | |
63 <absolute-threshold>-4.0</absolute-threshold> | |
64 <usingQueryPoint>1</usingQueryPoint> | |
65 <lsh-exact>0</lsh-exact> | |
66 </adb:sequenceQueryByKey> | |
67 </SOAP-ENV:Body> | |
68 </SOAP-ENV:Envelope> | |
69 """ | |
70 | |
71 STATUS_TEMPLATE = """ | |
72 <?xml version="1.0" encoding="UTF-8"?> | |
73 <SOAP-ENV:Envelope | |
74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" | |
75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" | |
76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
77 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
78 xmlns:adb="http://tempuri.org/adb.xsd"> | |
79 <SOAP-ENV:Body> | |
80 <adb:status> | |
81 <dbName>%s</dbName> | |
82 </adb:status> | |
83 </SOAP-ENV:Body> | |
84 </SOAP-ENV:Envelope> | |
85 """ | |
86 | |
87 SHINGLE_QUERY_TEMPLATE = """ | |
88 <?xml version="1.0" encoding="UTF-8"?> | |
89 <SOAP-ENV:Envelope | |
90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" | |
91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" | |
92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
93 xmlns:xsd="http://www.w3.org/2001/XMLSchema" | |
94 xmlns:adb="http://tempuri.org/adb.xsd"> | |
95 <SOAP-ENV:Body> | |
96 <adb:shingleQuery> | |
97 <dbName>%s</dbName> | |
98 <qVector> | |
99 <dim>%s</dim> | |
100 %s | |
101 %s | |
102 </qVector> | |
103 <keyList></keyList> | |
104 <timesFileName></timesFileName> | |
105 <queryType>32</queryType> | |
106 <queryPos>%s</queryPos> | |
107 <pointNN>%s</pointNN> | |
108 <trackNN>%s</trackNN> | |
109 <sequenceLength>1</sequenceLength> | |
110 <radius>%s</radius> | |
111 <absolute-threshold>%s</absolute-threshold> | |
112 <relative-threshold>%s</relative-threshold> | |
113 <exhaustive>%s</exhaustive> | |
114 <lsh-exact>%s</lsh-exact> | |
115 </adb:shingleQuery> | |
116 </SOAP-ENV:Body> | |
117 </SOAP-ENV:Envelope> | |
118 """ | |
119 | |
120 ############### List Query - Show the files in the database ########### | |
121 # Return a list of (key identifier, frame length) pairs. | |
122 def RunListQuery(): | |
123 global debug, dbName | |
124 message = LIST_TEMPLATE%(dbName) | |
125 | |
126 response = SendXMLCommand(message) | |
127 return response | |
128 | |
129 # Construct a list from the two DOM entries passed. Used with the map routine to | |
130 # assemble the output. | |
131 def ParseListConstruct(f,l): | |
132 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')] | |
133 return el | |
134 | |
135 def ParseListXML(response): | |
136 dom = parseString(response) | |
137 fileElements = dom.getElementsByTagName('Rkey') | |
138 # print fileElements | |
139 lenElements = dom.getElementsByTagName('Rlen') | |
140 # print lenElements | |
141 return map(ParseListConstruct, fileElements, lenElements) | |
142 | |
143 ############### Status Query - Show the status of the database ########### | |
144 # Return a dictionary with the status fields | |
145 def GetDomElement(dom, field): | |
146 els = dom.getElementsByTagName(field) | |
147 if len(els) > 0: | |
148 return els[0].firstChild.data.encode('latin-1') | |
149 else: | |
150 return "" | |
151 | |
152 def RunStatusQuery(): | |
153 global debug, dbName | |
154 message = STATUS_TEMPLATE%(dbName) | |
155 | |
156 response = SendXMLCommand(message) | |
157 # print response | |
158 dom = parseString(response) | |
159 status = {} | |
160 status['numFiles'] = GetDomElement(dom, 'numFiles') | |
161 status['dim'] = GetDomElement(dom, 'dim') | |
162 status['length'] = GetDomElement(dom, 'length') | |
163 status['dudCount'] = GetDomElement(dom, 'dudCount') | |
164 status['nullCount'] = GetDomElement(dom, 'nullCount') | |
165 return status | |
166 | |
167 | |
168 | |
169 ############### Shingle/Matrix Query - Show the data closest to shingle range ########### | |
170 # | |
171 # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column | |
172 # dim # number of columns | |
173 # f1,1 f1,2...f1,dim # first row | |
174 # f2,1 f2,2...f2,dim | |
175 # ... | |
176 # fN,1 fN,2...fN,dim # last row | |
177 # p1,1 # first row's power | |
178 # p2,1 | |
179 # ... | |
180 # pN,1 # last row's power | |
181 # | |
182 def RunShingleQuery(): | |
183 global debug, dbName | |
184 featureDim = '3' | |
185 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features | |
186 powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row | |
187 queryPos = '0' # where in the feature sequence to start the shingle query | |
188 pointNN = '10' # how many near points to return per track | |
189 trackNN = '10' # how many near tracks to return | |
190 radius = '1.0' # search radius | |
191 absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles) | |
192 relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore | |
193 exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength | |
194 lshExact = '0' # if using an index then compute exact distances after LSH retrieval | |
195 message = SHINGLE_QUERY_TEMPLATE | |
196 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact); | |
197 # print message | |
198 print message | |
199 response = SendXMLCommand(message) | |
200 ParseShingleXML(response) | |
201 | |
202 | |
203 ############### Sequence Query - Show the data closest to one query ########### | |
204 def RunSequenceQuery(argv): | |
205 global debug, dbName | |
206 if len(argv) > 2: | |
207 dbKey = argv[2] | |
208 qType = '32' # nSequence | |
209 qPos = argv[3] | |
210 pointNN = '10' | |
211 trackNN = '5' | |
212 seqLen = argv[4] | |
213 queryRadius = '0.5' | |
214 else: | |
215 dbKey = 'tmp/3.chr' | |
216 qType = '32' # nSequence | |
217 qPos = '110' | |
218 pointNN = '10' | |
219 trackNN = '5' | |
220 seqLen = '20' | |
221 queryRadius = '0.4' | |
222 | |
223 message = SEQUENCE_TEMPLATE | |
224 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius) | |
225 # print message | |
226 response = SendXMLCommand(message) | |
227 ParseShingleXML(response) | |
228 | |
229 def ParseShingleXML(response): | |
230 # Grab all the responses | |
231 # See http://diveintopython.org/xml_processing/parsing_xml.html | |
232 dom = parseString(response) | |
233 resultList = [] | |
234 for node in dom.getElementsByTagName('Rlist'): | |
235 # print node.toxml() | |
236 resultList.append(node.firstChild.data.encode('latin-1')) | |
237 | |
238 distanceList = [] | |
239 for node in dom.getElementsByTagName('Dist'): | |
240 # print node.toxml() | |
241 distanceList.append(node.firstChild.data.encode('latin-1')) | |
242 | |
243 positionList = [] | |
244 for node in dom.getElementsByTagName('Spos'): | |
245 # print node.toxml() | |
246 positionList.append(node.firstChild.data.encode('latin-1')) | |
247 | |
248 # print resultList | |
249 # print distanceList | |
250 # print positionList | |
251 | |
252 # Print out a summary of the most similar results | |
253 for i in range(0,len(resultList)): | |
254 if i > 0 and resultList[i] != resultList[i-1]: | |
255 print | |
256 print positionList[i], distanceList[i], resultList[i] | |
257 | |
258 dom.unlink() | |
259 | |
260 ############### XML and Network Utilities ########### | |
261 # Send one XML SOAP command to the server. Get back the response. | |
262 | |
263 def SendXMLCommand(message): | |
264 global debug | |
265 if debug: | |
266 print message | |
267 print | |
268 | |
269 #Create a socket | |
270 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
271 | |
272 #Connect to server | |
273 sSock.connect((serverHost, serverPort)) | |
274 | |
275 #Send messages | |
276 sSock.send(message) | |
277 data = "" | |
278 # Now loop, while getting all the data we can get from the socket. | |
279 while True: | |
280 c = sSock.recv(4096) | |
281 if c == "": | |
282 break | |
283 data += c | |
284 if data == "": | |
285 print "No response from the audioDB server" | |
286 sys.exit(0) | |
287 # Split off the HTTP header and the data | |
288 header,response = data.split("\r\n\r\n", 1) | |
289 if debug: | |
290 print 'Client received: ',response | |
291 | |
292 sSock.close() | |
293 return response | |
294 | |
295 | |
296 ############### Main Program - Figure out which query we want ########### | |
297 | |
298 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html | |
299 | |
300 import sys | |
301 if __name__=="__main__": | |
302 cmdname = sys.argv[0] | |
303 if len(sys.argv) == 1: | |
304 print "Syntax: " + sys.argv[0] + " -q feature_file pos len" | |
305 sys.exit(1) | |
306 | |
307 queryType = sys.argv[1] | |
308 if queryType == '-s' or queryType == 'status': | |
309 response = RunStatusQuery() | |
310 for k, v in response.iteritems(): | |
311 print k, v | |
312 elif queryType == '-q' or queryType == 'query': | |
313 RunSequenceQuery(sys.argv) | |
314 elif queryType == '-l' or queryType == 'list': | |
315 response = RunListQuery() | |
316 # print response | |
317 results = ParseListXML(response) | |
318 for (f,l) in results: | |
319 print "%s\t%s" % (f,l) | |
320 elif queryType == '-v' or queryType == 'vector': | |
321 response = RunShingleQuery() | |
322 | |
323 |