annotate QueryADB.py @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents a30948382f56
children
rev   line source
mas01mc@354 1 #!/usr/bin/python
mas01mc@354 2
mas01mc@354 3 # Python code/library to query the audioDB via the SOAP web interface.
mas01mc@354 4 # by Malcolm Slaney, August/September 2008
mas01mc@354 5 # malcolm@ieee.org
mas01mc@354 6
mas01mc@354 7 import sys, socket
mas01mc@354 8 from xml.dom.minidom import parseString
mas01mc@354 9
mas01mc@354 10 global debug
mas01mc@354 11 debug = False
mas01mc@354 12 global dbName
mas01mc@354 13 dbName = 'tutorial.adb'
mas01mc@354 14
mas01mc@354 15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
mas01mc@354 16 #serverHost = 'research-hm3.corp.sk1.yahoo.com'
mas01mc@354 17 serverHost = 'localhost'
mas01mc@476 18 serverPort = 14475
mas01mc@354 19
mas01mc@354 20 # Start the server on serverHost with
mas01mc@354 21 # ./audioDB -s 14475
mas01mc@354 22
mas01mc@354 23 # Here are the templates used for the different kinds of queries. We'll fill in the
mas01mc@354 24 # desired parameters are we go.
mas01mc@354 25 LIST_TEMPLATE = """
mas01mc@354 26 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 27 <SOAP-ENV:Envelope
mas01mc@354 28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 31 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 32 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 33 <SOAP-ENV:Body>
mas01mc@354 34 <adb:liszt>
mas01mc@354 35 <dbName>%s</dbName>
mas01mc@354 36 <lisztOffset>0</lisztOffset>
mas01mc@354 37 <lisztLength>100000</lisztLength>
mas01mc@354 38 </adb:liszt>
mas01mc@354 39 </SOAP-ENV:Body>
mas01mc@354 40 </SOAP-ENV:Envelope>
mas01mc@354 41 """
mas01mc@354 42
mas01mc@354 43 SEQUENCE_TEMPLATE = """
mas01mc@354 44 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 45 <SOAP-ENV:Envelope
mas01mc@354 46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 49 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 50 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 51 <SOAP-ENV:Body>
mas01mc@354 52 <adb:sequenceQueryByKey>
mas01mc@354 53 <dbName>%s</dbName>
mas01mc@354 54 <featureFileName>%s</featureFileName>
mas01mc@354 55 <queryType>%s</queryType>
mas01mc@354 56 <trackFileName></trackFileName>
mas01mc@354 57 <timesFileName></timesFileName>
mas01mc@354 58 <queryPoint>%s</queryPoint>
mas01mc@354 59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve -->
mas01mc@354 60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve -->
mas01mc@354 61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle -->
mas01mc@354 62 <radius>%s</radius> <!-- Distance radius to search -->
mas01mc@354 63 <absolute-threshold>-4.0</absolute-threshold>
mas01mc@354 64 <usingQueryPoint>1</usingQueryPoint>
mas01mc@354 65 <lsh-exact>0</lsh-exact>
mas01mc@354 66 </adb:sequenceQueryByKey>
mas01mc@354 67 </SOAP-ENV:Body>
mas01mc@354 68 </SOAP-ENV:Envelope>
mas01mc@354 69 """
mas01mc@354 70
mas01mc@354 71 STATUS_TEMPLATE = """
mas01mc@354 72 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 73 <SOAP-ENV:Envelope
mas01mc@354 74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 77 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 78 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 79 <SOAP-ENV:Body>
mas01mc@354 80 <adb:status>
mas01mc@354 81 <dbName>%s</dbName>
mas01mc@354 82 </adb:status>
mas01mc@354 83 </SOAP-ENV:Body>
mas01mc@354 84 </SOAP-ENV:Envelope>
mas01mc@354 85 """
mas01mc@354 86
mas01mc@354 87 SHINGLE_QUERY_TEMPLATE = """
mas01mc@354 88 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 89 <SOAP-ENV:Envelope
mas01mc@354 90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 93 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 94 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 95 <SOAP-ENV:Body>
mas01mc@354 96 <adb:shingleQuery>
mas01mc@354 97 <dbName>%s</dbName>
mas01mc@354 98 <qVector>
mas01mc@354 99 <dim>%s</dim>
mas01mc@354 100 %s
mas01mc@354 101 %s
mas01mc@354 102 </qVector>
mas01mc@354 103 <keyList></keyList>
mas01mc@354 104 <timesFileName></timesFileName>
mas01mc@354 105 <queryType>32</queryType>
mas01mc@354 106 <queryPos>%s</queryPos>
mas01mc@354 107 <pointNN>%s</pointNN>
mas01mc@354 108 <trackNN>%s</trackNN>
mas01mc@354 109 <sequenceLength>1</sequenceLength>
mas01mc@354 110 <radius>%s</radius>
mas01mc@354 111 <absolute-threshold>%s</absolute-threshold>
mas01mc@354 112 <relative-threshold>%s</relative-threshold>
mas01mc@354 113 <exhaustive>%s</exhaustive>
mas01mc@354 114 <lsh-exact>%s</lsh-exact>
mas01mc@354 115 </adb:shingleQuery>
mas01mc@354 116 </SOAP-ENV:Body>
mas01mc@354 117 </SOAP-ENV:Envelope>
mas01mc@354 118 """
mas01mc@354 119
mas01mc@474 120
mas01mc@474 121
mas01mc@474 122 FEATURE_QUERY_TEMPLATE = """
mas01mc@474 123 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@474 124 <SOAP-ENV:Envelope
mas01mc@474 125 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@474 126 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@474 127 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@474 128 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@474 129 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@474 130 <SOAP-ENV:Body>
mas01mc@474 131 <adb:query>
mas01mc@474 132 <dbName>%s</dbName>
mas01mc@474 133 <qKey>%s</qKey>
mas01mc@474 134 <keyList>%s</keyList>
mas01mc@474 135 <timesFileName>%s</timesFileName>
mas01mc@474 136 <powerFileName>%s</powerFileName>
mas01mc@474 137 <qType>%s</qType>
mas01mc@474 138 <qPos>%s</qPos>
mas01mc@474 139 <pointNN>%s</pointNN>
mas01mc@474 140 <segNN>%s</segNN>
mas01mc@474 141 <segLen>%s</segLen>
mas01mc@474 142 <radius>%s</radius>
mas01mc@474 143 <absolute-threshold>%s</absolute-threshold>
mas01mc@474 144 <relative-threshold>%s</relative-threshold>
mas01mc@474 145 <exhaustive>%s</exhaustive>
mas01mc@474 146 <lsh-exact>%s</lsh-exact>
mas01mc@474 147 <no-unit-norming>%s</no-unit-norming>
mas01mc@474 148 </adb:query>
mas01mc@474 149 </SOAP-ENV:Body>
mas01mc@474 150 </SOAP-ENV:Envelope>
mas01mc@474 151 """
mas01mc@474 152
mas01mc@354 153 ############### List Query - Show the files in the database ###########
mas01mc@354 154 # Return a list of (key identifier, frame length) pairs.
mas01mc@354 155 def RunListQuery():
mas01mc@354 156 global debug, dbName
mas01mc@354 157 message = LIST_TEMPLATE%(dbName)
mas01mc@354 158
mas01mc@354 159 response = SendXMLCommand(message)
mas01mc@354 160 return response
mas01mc@354 161
mas01mc@354 162 # Construct a list from the two DOM entries passed. Used with the map routine to
mas01mc@354 163 # assemble the output.
mas01mc@354 164 def ParseListConstruct(f,l):
mas01mc@354 165 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
mas01mc@354 166 return el
mas01mc@354 167
mas01mc@354 168 def ParseListXML(response):
mas01mc@354 169 dom = parseString(response)
mas01mc@354 170 fileElements = dom.getElementsByTagName('Rkey')
mas01mc@354 171 # print fileElements
mas01mc@354 172 lenElements = dom.getElementsByTagName('Rlen')
mas01mc@354 173 # print lenElements
mas01mc@354 174 return map(ParseListConstruct, fileElements, lenElements)
mas01mc@354 175
mas01mc@354 176 ############### Status Query - Show the status of the database ###########
mas01mc@354 177 # Return a dictionary with the status fields
mas01mc@354 178 def GetDomElement(dom, field):
mas01mc@354 179 els = dom.getElementsByTagName(field)
mas01mc@354 180 if len(els) > 0:
mas01mc@354 181 return els[0].firstChild.data.encode('latin-1')
mas01mc@354 182 else:
mas01mc@354 183 return ""
mas01mc@354 184
mas01mc@354 185 def RunStatusQuery():
mas01mc@354 186 global debug, dbName
mas01mc@354 187 message = STATUS_TEMPLATE%(dbName)
mas01mc@354 188
mas01mc@354 189 response = SendXMLCommand(message)
mas01mc@354 190 # print response
mas01mc@354 191 dom = parseString(response)
mas01mc@354 192 status = {}
mas01mc@354 193 status['numFiles'] = GetDomElement(dom, 'numFiles')
mas01mc@354 194 status['dim'] = GetDomElement(dom, 'dim')
mas01mc@354 195 status['length'] = GetDomElement(dom, 'length')
mas01mc@354 196 status['dudCount'] = GetDomElement(dom, 'dudCount')
mas01mc@354 197 status['nullCount'] = GetDomElement(dom, 'nullCount')
mas01mc@354 198 return status
mas01mc@354 199
mas01mc@354 200
mas01mc@354 201
mas01mc@354 202 ############### Shingle/Matrix Query - Show the data closest to shingle range ###########
mas01mc@354 203 #
mas01mc@354 204 # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
mas01mc@354 205 # dim # number of columns
mas01mc@354 206 # f1,1 f1,2...f1,dim # first row
mas01mc@354 207 # f2,1 f2,2...f2,dim
mas01mc@354 208 # ...
mas01mc@354 209 # fN,1 fN,2...fN,dim # last row
mas01mc@354 210 # p1,1 # first row's power
mas01mc@354 211 # p2,1
mas01mc@354 212 # ...
mas01mc@354 213 # pN,1 # last row's power
mas01mc@354 214 #
mas01mc@354 215 def RunShingleQuery():
mas01mc@354 216 global debug, dbName
mas01mc@354 217 featureDim = '3'
mas01mc@354 218 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features
mas01mc@354 219 powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row
mas01mc@354 220 queryPos = '0' # where in the feature sequence to start the shingle query
mas01mc@354 221 pointNN = '10' # how many near points to return per track
mas01mc@354 222 trackNN = '10' # how many near tracks to return
mas01mc@354 223 radius = '1.0' # search radius
mas01mc@354 224 absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles)
mas01mc@354 225 relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore
mas01mc@354 226 exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength
mas01mc@354 227 lshExact = '0' # if using an index then compute exact distances after LSH retrieval
mas01mc@354 228 message = SHINGLE_QUERY_TEMPLATE
mas01mc@354 229 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
mas01mc@354 230 # print message
mas01mc@354 231 print message
mas01mc@354 232 response = SendXMLCommand(message)
mas01mc@354 233 ParseShingleXML(response)
mas01mc@354 234
mas01mc@354 235
mas01mc@354 236 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@354 237 def RunSequenceQuery(argv):
mas01mc@354 238 global debug, dbName
mas01mc@354 239 if len(argv) > 2:
mas01mc@354 240 dbKey = argv[2]
mas01mc@354 241 qType = '32' # nSequence
mas01mc@354 242 qPos = argv[3]
mas01mc@354 243 pointNN = '10'
mas01mc@354 244 trackNN = '5'
mas01mc@354 245 seqLen = argv[4]
mas01mc@474 246 queryRadius = '2'
mas01mc@354 247 else:
mas01mc@354 248 dbKey = 'tmp/3.chr'
mas01mc@354 249 qType = '32' # nSequence
mas01mc@354 250 qPos = '110'
mas01mc@354 251 pointNN = '10'
mas01mc@354 252 trackNN = '5'
mas01mc@354 253 seqLen = '20'
mas01mc@354 254 queryRadius = '0.4'
mas01mc@354 255
mas01mc@354 256 message = SEQUENCE_TEMPLATE
mas01mc@354 257 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
mas01mc@354 258 # print message
mas01mc@354 259 response = SendXMLCommand(message)
mas01mc@354 260 ParseShingleXML(response)
mas01mc@354 261
mas01mc@474 262 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@474 263 def RunQuery(argv):
mas01mc@474 264 # <dbName>%s</dbName>
mas01mc@474 265 # <qKey>%s</qKey>
mas01mc@474 266 # <keyList>%s</keyList>
mas01mc@474 267 # <timesFileName>%s</timesFileName>
mas01mc@474 268 # <powerFileName>%s</powerFileName>
mas01mc@474 269 # <qType>%s</qType>
mas01mc@474 270 # <qPos>%s</qPos>
mas01mc@474 271 # <pointNN>%s</pointNN>
mas01mc@474 272 # <segNN>%s</segNN>
mas01mc@474 273 # <segLen>%s</segLen>
mas01mc@474 274 # <radius>%s</radius>
mas01mc@474 275 # <absolute-threshold>%s</absolute-threshold>
mas01mc@474 276 # <relative-threshold>%s</relative-threshold>
mas01mc@474 277 # <exhaustive>%s</exhaustive>
mas01mc@474 278 # <lsh-exact>%s</lsh-exact>
mas01mc@474 279 # <no-unit-norming>%s</no-unit-norming>
mas01mc@474 280 global debug, dbName
mas01mc@474 281 if len(argv) > 2:
mas01mc@474 282 featureFile = argv[2]
mas01mc@474 283 powerFile = argv[3]
mas01mc@474 284 qType = '32' # nSequence
mas01mc@474 285 qPos = argv[4]
mas01mc@474 286 pointNN = '20'
mas01mc@474 287 trackNN = '5'
mas01mc@474 288 seqLen = argv[5]
mas01mc@513 289 queryRadius = '0.1'
mas01mc@474 290 else:
mas01mc@474 291 featureFile = 'foo.chr12'
mas01mc@474 292 powerFile = 'foo.power'
mas01mc@474 293 qType = '32' # nSequence
mas01mc@474 294 qPos = '0'
mas01mc@474 295 pointNN = '3'
mas01mc@474 296 trackNN = '5'
mas01mc@474 297 seqLen = '10'
mas01mc@513 298 queryRadius = '0.1'
mas01mc@474 299
mas01mc@474 300 message = FEATURE_QUERY_TEMPLATE
mas01mc@474 301 message = FEATURE_QUERY_TEMPLATE%(dbName, featureFile, "", "", powerFile, qType, qPos, pointNN, trackNN, seqLen, queryRadius, '0.0', '0.0', '0', '1','0')
mas01mc@474 302
mas01mc@474 303 print message
mas01mc@474 304 response = SendXMLCommand(message)
mas01mc@474 305 ParseShingleXML(response)
mas01mc@474 306
mas01mc@354 307 def ParseShingleXML(response):
mas01mc@354 308 # Grab all the responses
mas01mc@354 309 # See http://diveintopython.org/xml_processing/parsing_xml.html
mas01mc@354 310 dom = parseString(response)
mas01mc@354 311 resultList = []
mas01mc@354 312 for node in dom.getElementsByTagName('Rlist'):
mas01mc@354 313 # print node.toxml()
mas01mc@354 314 resultList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 315
mas01mc@354 316 distanceList = []
mas01mc@354 317 for node in dom.getElementsByTagName('Dist'):
mas01mc@354 318 # print node.toxml()
mas01mc@354 319 distanceList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 320
mas01mc@354 321 positionList = []
mas01mc@354 322 for node in dom.getElementsByTagName('Spos'):
mas01mc@354 323 # print node.toxml()
mas01mc@354 324 positionList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 325
mas01mc@354 326 # print resultList
mas01mc@354 327 # print distanceList
mas01mc@354 328 # print positionList
mas01mc@354 329
mas01mc@354 330 # Print out a summary of the most similar results
mas01mc@354 331 for i in range(0,len(resultList)):
mas01mc@354 332 if i > 0 and resultList[i] != resultList[i-1]:
mas01mc@354 333 print
mas01mc@354 334 print positionList[i], distanceList[i], resultList[i]
mas01mc@354 335
mas01mc@354 336 dom.unlink()
mas01mc@354 337
mas01mc@354 338 ############### XML and Network Utilities ###########
mas01mc@354 339 # Send one XML SOAP command to the server. Get back the response.
mas01mc@354 340
mas01mc@354 341 def SendXMLCommand(message):
mas01mc@354 342 global debug
mas01mc@354 343 if debug:
mas01mc@354 344 print message
mas01mc@354 345 print
mas01mc@354 346
mas01mc@354 347 #Create a socket
mas01mc@354 348 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mas01mc@354 349
mas01mc@354 350 #Connect to server
mas01mc@354 351 sSock.connect((serverHost, serverPort))
mas01mc@354 352
mas01mc@354 353 #Send messages
mas01mc@354 354 sSock.send(message)
mas01mc@354 355 data = ""
mas01mc@354 356 # Now loop, while getting all the data we can get from the socket.
mas01mc@354 357 while True:
mas01mc@354 358 c = sSock.recv(4096)
mas01mc@354 359 if c == "":
mas01mc@354 360 break
mas01mc@354 361 data += c
mas01mc@354 362 if data == "":
mas01mc@354 363 print "No response from the audioDB server"
mas01mc@354 364 sys.exit(0)
mas01mc@354 365 # Split off the HTTP header and the data
mas01mc@354 366 header,response = data.split("\r\n\r\n", 1)
mas01mc@354 367 if debug:
mas01mc@354 368 print 'Client received: ',response
mas01mc@354 369
mas01mc@354 370 sSock.close()
mas01mc@354 371 return response
mas01mc@354 372
mas01mc@354 373
mas01mc@354 374 ############### Main Program - Figure out which query we want ###########
mas01mc@354 375
mas01mc@354 376 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
mas01mc@354 377
mas01mc@354 378 import sys
mas01mc@354 379 if __name__=="__main__":
mas01mc@354 380 cmdname = sys.argv[0]
mas01mc@354 381 if len(sys.argv) == 1:
mas01mc@474 382 print "Syntax: " + sys.argv[0] + " -{s,q,f,l} feature_file [power_file] pos len"
mas01mc@354 383 sys.exit(1)
mas01mc@354 384
mas01mc@354 385 queryType = sys.argv[1]
mas01mc@354 386 if queryType == '-s' or queryType == 'status':
mas01mc@354 387 response = RunStatusQuery()
mas01mc@354 388 for k, v in response.iteritems():
mas01mc@354 389 print k, v
mas01mc@354 390 elif queryType == '-q' or queryType == 'query':
mas01mc@354 391 RunSequenceQuery(sys.argv)
mas01mc@474 392 elif queryType == '-f' or queryType == 'feature':
mas01mc@474 393 RunQuery(sys.argv)
mas01mc@354 394 elif queryType == '-l' or queryType == 'list':
mas01mc@354 395 response = RunListQuery()
mas01mc@354 396 # print response
mas01mc@354 397 results = ParseListXML(response)
mas01mc@354 398 for (f,l) in results:
mas01mc@354 399 print "%s\t%s" % (f,l)
mas01mc@354 400 elif queryType == '-v' or queryType == 'vector':
mas01mc@354 401 response = RunShingleQuery()
mas01mc@354 402
mas01mc@354 403