comparison QueryADB.py @ 354:4871a3ed9e36 pre_api

Added serialized feature query Web Service, server-side only, example Python script: QueryADB.py -v
author mas01mc
date Fri, 07 Nov 2008 12:48:26 +0000
parents
children f9d86b1db21c
comparison
equal deleted inserted replaced
347:0d02bcd74a40 354:4871a3ed9e36
1 #!/usr/bin/python
2
3 # Python code/library to query the audioDB via the SOAP web interface.
4 # by Malcolm Slaney, August/September 2008
5 # malcolm@ieee.org
6
7 import sys, socket
8 from xml.dom.minidom import parseString
9
10 global debug
11 debug = False
12 global dbName
13 dbName = 'tutorial.adb'
14
15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
16 #serverHost = 'research-hm3.corp.sk1.yahoo.com'
17 serverHost = 'localhost'
18 serverPort = 14475
19
20 # Start the server on serverHost with
21 # ./audioDB -s 14475
22
23 # Here are the templates used for the different kinds of queries. We'll fill in the
24 # desired parameters are we go.
25 LIST_TEMPLATE = """
26 <?xml version="1.0" encoding="UTF-8"?>
27 <SOAP-ENV:Envelope
28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
31 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
32 xmlns:adb="http://tempuri.org/adb.xsd">
33 <SOAP-ENV:Body>
34 <adb:liszt>
35 <dbName>%s</dbName>
36 <lisztOffset>0</lisztOffset>
37 <lisztLength>100000</lisztLength>
38 </adb:liszt>
39 </SOAP-ENV:Body>
40 </SOAP-ENV:Envelope>
41 """
42
43 SEQUENCE_TEMPLATE = """
44 <?xml version="1.0" encoding="UTF-8"?>
45 <SOAP-ENV:Envelope
46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
49 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
50 xmlns:adb="http://tempuri.org/adb.xsd">
51 <SOAP-ENV:Body>
52 <adb:sequenceQueryByKey>
53 <dbName>%s</dbName>
54 <featureFileName>%s</featureFileName>
55 <queryType>%s</queryType>
56 <trackFileName></trackFileName>
57 <timesFileName></timesFileName>
58 <queryPoint>%s</queryPoint>
59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve -->
60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve -->
61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle -->
62 <radius>%s</radius> <!-- Distance radius to search -->
63 <absolute-threshold>-4.0</absolute-threshold>
64 <usingQueryPoint>1</usingQueryPoint>
65 <lsh-exact>0</lsh-exact>
66 </adb:sequenceQueryByKey>
67 </SOAP-ENV:Body>
68 </SOAP-ENV:Envelope>
69 """
70
71 STATUS_TEMPLATE = """
72 <?xml version="1.0" encoding="UTF-8"?>
73 <SOAP-ENV:Envelope
74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
77 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
78 xmlns:adb="http://tempuri.org/adb.xsd">
79 <SOAP-ENV:Body>
80 <adb:status>
81 <dbName>%s</dbName>
82 </adb:status>
83 </SOAP-ENV:Body>
84 </SOAP-ENV:Envelope>
85 """
86
87 SHINGLE_QUERY_TEMPLATE = """
88 <?xml version="1.0" encoding="UTF-8"?>
89 <SOAP-ENV:Envelope
90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
93 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
94 xmlns:adb="http://tempuri.org/adb.xsd">
95 <SOAP-ENV:Body>
96 <adb:shingleQuery>
97 <dbName>%s</dbName>
98 <qVector>
99 <dim>%s</dim>
100 %s
101 %s
102 </qVector>
103 <keyList></keyList>
104 <timesFileName></timesFileName>
105 <queryType>32</queryType>
106 <queryPos>%s</queryPos>
107 <pointNN>%s</pointNN>
108 <trackNN>%s</trackNN>
109 <sequenceLength>1</sequenceLength>
110 <radius>%s</radius>
111 <absolute-threshold>%s</absolute-threshold>
112 <relative-threshold>%s</relative-threshold>
113 <exhaustive>%s</exhaustive>
114 <lsh-exact>%s</lsh-exact>
115 </adb:shingleQuery>
116 </SOAP-ENV:Body>
117 </SOAP-ENV:Envelope>
118 """
119
120 ############### List Query - Show the files in the database ###########
121 # Return a list of (key identifier, frame length) pairs.
122 def RunListQuery():
123 global debug, dbName
124 message = LIST_TEMPLATE%(dbName)
125
126 response = SendXMLCommand(message)
127 return response
128
129 # Construct a list from the two DOM entries passed. Used with the map routine to
130 # assemble the output.
131 def ParseListConstruct(f,l):
132 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
133 return el
134
135 def ParseListXML(response):
136 dom = parseString(response)
137 fileElements = dom.getElementsByTagName('Rkey')
138 # print fileElements
139 lenElements = dom.getElementsByTagName('Rlen')
140 # print lenElements
141 return map(ParseListConstruct, fileElements, lenElements)
142
143 ############### Status Query - Show the status of the database ###########
144 # Return a dictionary with the status fields
145 def GetDomElement(dom, field):
146 els = dom.getElementsByTagName(field)
147 if len(els) > 0:
148 return els[0].firstChild.data.encode('latin-1')
149 else:
150 return ""
151
152 def RunStatusQuery():
153 global debug, dbName
154 message = STATUS_TEMPLATE%(dbName)
155
156 response = SendXMLCommand(message)
157 # print response
158 dom = parseString(response)
159 status = {}
160 status['numFiles'] = GetDomElement(dom, 'numFiles')
161 status['dim'] = GetDomElement(dom, 'dim')
162 status['length'] = GetDomElement(dom, 'length')
163 status['dudCount'] = GetDomElement(dom, 'dudCount')
164 status['nullCount'] = GetDomElement(dom, 'nullCount')
165 return status
166
167
168
169 ############### Shingle/Matrix Query - Show the data closest to shingle range ###########
170 #
171 # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
172 # dim # number of columns
173 # f1,1 f1,2...f1,dim # first row
174 # f2,1 f2,2...f2,dim
175 # ...
176 # fN,1 fN,2...fN,dim # last row
177 # p1,1 # first row's power
178 # p2,1
179 # ...
180 # pN,1 # last row's power
181 #
182 def RunShingleQuery():
183 global debug, dbName
184 featureDim = '3'
185 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features
186 powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row
187 queryPos = '0' # where in the feature sequence to start the shingle query
188 pointNN = '10' # how many near points to return per track
189 trackNN = '10' # how many near tracks to return
190 radius = '1.0' # search radius
191 absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles)
192 relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore
193 exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength
194 lshExact = '0' # if using an index then compute exact distances after LSH retrieval
195 message = SHINGLE_QUERY_TEMPLATE
196 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
197 # print message
198 print message
199 response = SendXMLCommand(message)
200 ParseShingleXML(response)
201
202
203 ############### Sequence Query - Show the data closest to one query ###########
204 def RunSequenceQuery(argv):
205 global debug, dbName
206 if len(argv) > 2:
207 dbKey = argv[2]
208 qType = '32' # nSequence
209 qPos = argv[3]
210 pointNN = '10'
211 trackNN = '5'
212 seqLen = argv[4]
213 queryRadius = '0.5'
214 else:
215 dbKey = 'tmp/3.chr'
216 qType = '32' # nSequence
217 qPos = '110'
218 pointNN = '10'
219 trackNN = '5'
220 seqLen = '20'
221 queryRadius = '0.4'
222
223 message = SEQUENCE_TEMPLATE
224 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
225 # print message
226 response = SendXMLCommand(message)
227 ParseShingleXML(response)
228
229 def ParseShingleXML(response):
230 # Grab all the responses
231 # See http://diveintopython.org/xml_processing/parsing_xml.html
232 dom = parseString(response)
233 resultList = []
234 for node in dom.getElementsByTagName('Rlist'):
235 # print node.toxml()
236 resultList.append(node.firstChild.data.encode('latin-1'))
237
238 distanceList = []
239 for node in dom.getElementsByTagName('Dist'):
240 # print node.toxml()
241 distanceList.append(node.firstChild.data.encode('latin-1'))
242
243 positionList = []
244 for node in dom.getElementsByTagName('Spos'):
245 # print node.toxml()
246 positionList.append(node.firstChild.data.encode('latin-1'))
247
248 # print resultList
249 # print distanceList
250 # print positionList
251
252 # Print out a summary of the most similar results
253 for i in range(0,len(resultList)):
254 if i > 0 and resultList[i] != resultList[i-1]:
255 print
256 print positionList[i], distanceList[i], resultList[i]
257
258 dom.unlink()
259
260 ############### XML and Network Utilities ###########
261 # Send one XML SOAP command to the server. Get back the response.
262
263 def SendXMLCommand(message):
264 global debug
265 if debug:
266 print message
267 print
268
269 #Create a socket
270 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
271
272 #Connect to server
273 sSock.connect((serverHost, serverPort))
274
275 #Send messages
276 sSock.send(message)
277 data = ""
278 # Now loop, while getting all the data we can get from the socket.
279 while True:
280 c = sSock.recv(4096)
281 if c == "":
282 break
283 data += c
284 if data == "":
285 print "No response from the audioDB server"
286 sys.exit(0)
287 # Split off the HTTP header and the data
288 header,response = data.split("\r\n\r\n", 1)
289 if debug:
290 print 'Client received: ',response
291
292 sSock.close()
293 return response
294
295
296 ############### Main Program - Figure out which query we want ###########
297
298 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
299
300 import sys
301 if __name__=="__main__":
302 cmdname = sys.argv[0]
303 if len(sys.argv) == 1:
304 print "Syntax: " + sys.argv[0] + " -q feature_file pos len"
305 sys.exit(1)
306
307 queryType = sys.argv[1]
308 if queryType == '-s' or queryType == 'status':
309 response = RunStatusQuery()
310 for k, v in response.iteritems():
311 print k, v
312 elif queryType == '-q' or queryType == 'query':
313 RunSequenceQuery(sys.argv)
314 elif queryType == '-l' or queryType == 'list':
315 response = RunListQuery()
316 # print response
317 results = ParseListXML(response)
318 for (f,l) in results:
319 print "%s\t%s" % (f,l)
320 elif queryType == '-v' or queryType == 'vector':
321 response = RunShingleQuery()
322
323