annotate QueryADB.py @ 601:82d23418d867

Fix some fd leaks in the command-line binary Strictly speaking, they're not really leaks, because the only codepath that suffers from these leaks exits immediately afterwards. On the other hand, this fix makes valgrind on e.g. tests/0025 happier, going from 5 errors to none.
author mas01cr
date Fri, 14 Aug 2009 16:39:32 +0000
parents a30948382f56
children
rev   line source
mas01mc@354 1 #!/usr/bin/python
mas01mc@354 2
mas01mc@354 3 # Python code/library to query the audioDB via the SOAP web interface.
mas01mc@354 4 # by Malcolm Slaney, August/September 2008
mas01mc@354 5 # malcolm@ieee.org
mas01mc@354 6
mas01mc@354 7 import sys, socket
mas01mc@354 8 from xml.dom.minidom import parseString
mas01mc@354 9
mas01mc@354 10 global debug
mas01mc@354 11 debug = False
mas01mc@354 12 global dbName
mas01mc@354 13 dbName = 'tutorial.adb'
mas01mc@354 14
mas01mc@354 15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
mas01mc@354 16 #serverHost = 'research-hm3.corp.sk1.yahoo.com'
mas01mc@354 17 serverHost = 'localhost'
mas01mc@476 18 serverPort = 14475
mas01mc@354 19
mas01mc@354 20 # Start the server on serverHost with
mas01mc@354 21 # ./audioDB -s 14475
mas01mc@354 22
mas01mc@354 23 # Here are the templates used for the different kinds of queries. We'll fill in the
mas01mc@354 24 # desired parameters are we go.
mas01mc@354 25 LIST_TEMPLATE = """
mas01mc@354 26 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 27 <SOAP-ENV:Envelope
mas01mc@354 28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 31 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 32 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 33 <SOAP-ENV:Body>
mas01mc@354 34 <adb:liszt>
mas01mc@354 35 <dbName>%s</dbName>
mas01mc@354 36 <lisztOffset>0</lisztOffset>
mas01mc@354 37 <lisztLength>100000</lisztLength>
mas01mc@354 38 </adb:liszt>
mas01mc@354 39 </SOAP-ENV:Body>
mas01mc@354 40 </SOAP-ENV:Envelope>
mas01mc@354 41 """
mas01mc@354 42
mas01mc@354 43 SEQUENCE_TEMPLATE = """
mas01mc@354 44 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 45 <SOAP-ENV:Envelope
mas01mc@354 46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 49 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 50 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 51 <SOAP-ENV:Body>
mas01mc@354 52 <adb:sequenceQueryByKey>
mas01mc@354 53 <dbName>%s</dbName>
mas01mc@354 54 <featureFileName>%s</featureFileName>
mas01mc@354 55 <queryType>%s</queryType>
mas01mc@354 56 <trackFileName></trackFileName>
mas01mc@354 57 <timesFileName></timesFileName>
mas01mc@354 58 <queryPoint>%s</queryPoint>
mas01mc@354 59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve -->
mas01mc@354 60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve -->
mas01mc@354 61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle -->
mas01mc@354 62 <radius>%s</radius> <!-- Distance radius to search -->
mas01mc@354 63 <absolute-threshold>-4.0</absolute-threshold>
mas01mc@354 64 <usingQueryPoint>1</usingQueryPoint>
mas01mc@354 65 <lsh-exact>0</lsh-exact>
mas01mc@354 66 </adb:sequenceQueryByKey>
mas01mc@354 67 </SOAP-ENV:Body>
mas01mc@354 68 </SOAP-ENV:Envelope>
mas01mc@354 69 """
mas01mc@354 70
mas01mc@354 71 STATUS_TEMPLATE = """
mas01mc@354 72 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 73 <SOAP-ENV:Envelope
mas01mc@354 74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 77 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 78 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 79 <SOAP-ENV:Body>
mas01mc@354 80 <adb:status>
mas01mc@354 81 <dbName>%s</dbName>
mas01mc@354 82 </adb:status>
mas01mc@354 83 </SOAP-ENV:Body>
mas01mc@354 84 </SOAP-ENV:Envelope>
mas01mc@354 85 """
mas01mc@354 86
mas01mc@354 87 SHINGLE_QUERY_TEMPLATE = """
mas01mc@354 88 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 89 <SOAP-ENV:Envelope
mas01mc@354 90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 93 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 94 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 95 <SOAP-ENV:Body>
mas01mc@354 96 <adb:shingleQuery>
mas01mc@354 97 <dbName>%s</dbName>
mas01mc@354 98 <qVector>
mas01mc@354 99 <dim>%s</dim>
mas01mc@354 100 %s
mas01mc@354 101 %s
mas01mc@354 102 </qVector>
mas01mc@354 103 <keyList></keyList>
mas01mc@354 104 <timesFileName></timesFileName>
mas01mc@354 105 <queryType>32</queryType>
mas01mc@354 106 <queryPos>%s</queryPos>
mas01mc@354 107 <pointNN>%s</pointNN>
mas01mc@354 108 <trackNN>%s</trackNN>
mas01mc@354 109 <sequenceLength>1</sequenceLength>
mas01mc@354 110 <radius>%s</radius>
mas01mc@354 111 <absolute-threshold>%s</absolute-threshold>
mas01mc@354 112 <relative-threshold>%s</relative-threshold>
mas01mc@354 113 <exhaustive>%s</exhaustive>
mas01mc@354 114 <lsh-exact>%s</lsh-exact>
mas01mc@354 115 </adb:shingleQuery>
mas01mc@354 116 </SOAP-ENV:Body>
mas01mc@354 117 </SOAP-ENV:Envelope>
mas01mc@354 118 """
mas01mc@354 119
mas01mc@474 120
mas01mc@474 121
mas01mc@474 122 FEATURE_QUERY_TEMPLATE = """
mas01mc@474 123 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@474 124 <SOAP-ENV:Envelope
mas01mc@474 125 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@474 126 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@474 127 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@474 128 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@474 129 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@474 130 <SOAP-ENV:Body>
mas01mc@474 131 <adb:query>
mas01mc@474 132 <dbName>%s</dbName>
mas01mc@474 133 <qKey>%s</qKey>
mas01mc@474 134 <keyList>%s</keyList>
mas01mc@474 135 <timesFileName>%s</timesFileName>
mas01mc@474 136 <powerFileName>%s</powerFileName>
mas01mc@474 137 <qType>%s</qType>
mas01mc@474 138 <qPos>%s</qPos>
mas01mc@474 139 <pointNN>%s</pointNN>
mas01mc@474 140 <segNN>%s</segNN>
mas01mc@474 141 <segLen>%s</segLen>
mas01mc@474 142 <radius>%s</radius>
mas01mc@474 143 <absolute-threshold>%s</absolute-threshold>
mas01mc@474 144 <relative-threshold>%s</relative-threshold>
mas01mc@474 145 <exhaustive>%s</exhaustive>
mas01mc@474 146 <lsh-exact>%s</lsh-exact>
mas01mc@474 147 <no-unit-norming>%s</no-unit-norming>
mas01mc@474 148 </adb:query>
mas01mc@474 149 </SOAP-ENV:Body>
mas01mc@474 150 </SOAP-ENV:Envelope>
mas01mc@474 151 """
mas01mc@474 152
mas01mc@354 153 ############### List Query - Show the files in the database ###########
mas01mc@354 154 # Return a list of (key identifier, frame length) pairs.
mas01mc@354 155 def RunListQuery():
mas01mc@354 156 global debug, dbName
mas01mc@354 157 message = LIST_TEMPLATE%(dbName)
mas01mc@354 158
mas01mc@354 159 response = SendXMLCommand(message)
mas01mc@354 160 return response
mas01mc@354 161
mas01mc@354 162 # Construct a list from the two DOM entries passed. Used with the map routine to
mas01mc@354 163 # assemble the output.
mas01mc@354 164 def ParseListConstruct(f,l):
mas01mc@354 165 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
mas01mc@354 166 return el
mas01mc@354 167
mas01mc@354 168 def ParseListXML(response):
mas01mc@354 169 dom = parseString(response)
mas01mc@354 170 fileElements = dom.getElementsByTagName('Rkey')
mas01mc@354 171 # print fileElements
mas01mc@354 172 lenElements = dom.getElementsByTagName('Rlen')
mas01mc@354 173 # print lenElements
mas01mc@354 174 return map(ParseListConstruct, fileElements, lenElements)
mas01mc@354 175
mas01mc@354 176 ############### Status Query - Show the status of the database ###########
mas01mc@354 177 # Return a dictionary with the status fields
mas01mc@354 178 def GetDomElement(dom, field):
mas01mc@354 179 els = dom.getElementsByTagName(field)
mas01mc@354 180 if len(els) > 0:
mas01mc@354 181 return els[0].firstChild.data.encode('latin-1')
mas01mc@354 182 else:
mas01mc@354 183 return ""
mas01mc@354 184
mas01mc@354 185 def RunStatusQuery():
mas01mc@354 186 global debug, dbName
mas01mc@354 187 message = STATUS_TEMPLATE%(dbName)
mas01mc@354 188
mas01mc@354 189 response = SendXMLCommand(message)
mas01mc@354 190 # print response
mas01mc@354 191 dom = parseString(response)
mas01mc@354 192 status = {}
mas01mc@354 193 status['numFiles'] = GetDomElement(dom, 'numFiles')
mas01mc@354 194 status['dim'] = GetDomElement(dom, 'dim')
mas01mc@354 195 status['length'] = GetDomElement(dom, 'length')
mas01mc@354 196 status['dudCount'] = GetDomElement(dom, 'dudCount')
mas01mc@354 197 status['nullCount'] = GetDomElement(dom, 'nullCount')
mas01mc@354 198 return status
mas01mc@354 199
mas01mc@354 200
mas01mc@354 201
mas01mc@354 202 ############### Shingle/Matrix Query - Show the data closest to shingle range ###########
mas01mc@354 203 #
mas01mc@354 204 # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
mas01mc@354 205 # dim # number of columns
mas01mc@354 206 # f1,1 f1,2...f1,dim # first row
mas01mc@354 207 # f2,1 f2,2...f2,dim
mas01mc@354 208 # ...
mas01mc@354 209 # fN,1 fN,2...fN,dim # last row
mas01mc@354 210 # p1,1 # first row's power
mas01mc@354 211 # p2,1
mas01mc@354 212 # ...
mas01mc@354 213 # pN,1 # last row's power
mas01mc@354 214 #
mas01mc@354 215 def RunShingleQuery():
mas01mc@354 216 global debug, dbName
mas01mc@354 217 featureDim = '3'
mas01mc@354 218 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features
mas01mc@354 219 powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row
mas01mc@354 220 queryPos = '0' # where in the feature sequence to start the shingle query
mas01mc@354 221 pointNN = '10' # how many near points to return per track
mas01mc@354 222 trackNN = '10' # how many near tracks to return
mas01mc@354 223 radius = '1.0' # search radius
mas01mc@354 224 absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles)
mas01mc@354 225 relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore
mas01mc@354 226 exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength
mas01mc@354 227 lshExact = '0' # if using an index then compute exact distances after LSH retrieval
mas01mc@354 228 message = SHINGLE_QUERY_TEMPLATE
mas01mc@354 229 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
mas01mc@354 230 # print message
mas01mc@354 231 print message
mas01mc@354 232 response = SendXMLCommand(message)
mas01mc@354 233 ParseShingleXML(response)
mas01mc@354 234
mas01mc@354 235
mas01mc@354 236 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@354 237 def RunSequenceQuery(argv):
mas01mc@354 238 global debug, dbName
mas01mc@354 239 if len(argv) > 2:
mas01mc@354 240 dbKey = argv[2]
mas01mc@354 241 qType = '32' # nSequence
mas01mc@354 242 qPos = argv[3]
mas01mc@354 243 pointNN = '10'
mas01mc@354 244 trackNN = '5'
mas01mc@354 245 seqLen = argv[4]
mas01mc@474 246 queryRadius = '2'
mas01mc@354 247 else:
mas01mc@354 248 dbKey = 'tmp/3.chr'
mas01mc@354 249 qType = '32' # nSequence
mas01mc@354 250 qPos = '110'
mas01mc@354 251 pointNN = '10'
mas01mc@354 252 trackNN = '5'
mas01mc@354 253 seqLen = '20'
mas01mc@354 254 queryRadius = '0.4'
mas01mc@354 255
mas01mc@354 256 message = SEQUENCE_TEMPLATE
mas01mc@354 257 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
mas01mc@354 258 # print message
mas01mc@354 259 response = SendXMLCommand(message)
mas01mc@354 260 ParseShingleXML(response)
mas01mc@354 261
mas01mc@474 262 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@474 263 def RunQuery(argv):
mas01mc@474 264 # <dbName>%s</dbName>
mas01mc@474 265 # <qKey>%s</qKey>
mas01mc@474 266 # <keyList>%s</keyList>
mas01mc@474 267 # <timesFileName>%s</timesFileName>
mas01mc@474 268 # <powerFileName>%s</powerFileName>
mas01mc@474 269 # <qType>%s</qType>
mas01mc@474 270 # <qPos>%s</qPos>
mas01mc@474 271 # <pointNN>%s</pointNN>
mas01mc@474 272 # <segNN>%s</segNN>
mas01mc@474 273 # <segLen>%s</segLen>
mas01mc@474 274 # <radius>%s</radius>
mas01mc@474 275 # <absolute-threshold>%s</absolute-threshold>
mas01mc@474 276 # <relative-threshold>%s</relative-threshold>
mas01mc@474 277 # <exhaustive>%s</exhaustive>
mas01mc@474 278 # <lsh-exact>%s</lsh-exact>
mas01mc@474 279 # <no-unit-norming>%s</no-unit-norming>
mas01mc@474 280 global debug, dbName
mas01mc@474 281 if len(argv) > 2:
mas01mc@474 282 featureFile = argv[2]
mas01mc@474 283 powerFile = argv[3]
mas01mc@474 284 qType = '32' # nSequence
mas01mc@474 285 qPos = argv[4]
mas01mc@474 286 pointNN = '20'
mas01mc@474 287 trackNN = '5'
mas01mc@474 288 seqLen = argv[5]
mas01mc@513 289 queryRadius = '0.1'
mas01mc@474 290 else:
mas01mc@474 291 featureFile = 'foo.chr12'
mas01mc@474 292 powerFile = 'foo.power'
mas01mc@474 293 qType = '32' # nSequence
mas01mc@474 294 qPos = '0'
mas01mc@474 295 pointNN = '3'
mas01mc@474 296 trackNN = '5'
mas01mc@474 297 seqLen = '10'
mas01mc@513 298 queryRadius = '0.1'
mas01mc@474 299
mas01mc@474 300 message = FEATURE_QUERY_TEMPLATE
mas01mc@474 301 message = FEATURE_QUERY_TEMPLATE%(dbName, featureFile, "", "", powerFile, qType, qPos, pointNN, trackNN, seqLen, queryRadius, '0.0', '0.0', '0', '1','0')
mas01mc@474 302
mas01mc@474 303 print message
mas01mc@474 304 response = SendXMLCommand(message)
mas01mc@474 305 ParseShingleXML(response)
mas01mc@474 306
mas01mc@354 307 def ParseShingleXML(response):
mas01mc@354 308 # Grab all the responses
mas01mc@354 309 # See http://diveintopython.org/xml_processing/parsing_xml.html
mas01mc@354 310 dom = parseString(response)
mas01mc@354 311 resultList = []
mas01mc@354 312 for node in dom.getElementsByTagName('Rlist'):
mas01mc@354 313 # print node.toxml()
mas01mc@354 314 resultList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 315
mas01mc@354 316 distanceList = []
mas01mc@354 317 for node in dom.getElementsByTagName('Dist'):
mas01mc@354 318 # print node.toxml()
mas01mc@354 319 distanceList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 320
mas01mc@354 321 positionList = []
mas01mc@354 322 for node in dom.getElementsByTagName('Spos'):
mas01mc@354 323 # print node.toxml()
mas01mc@354 324 positionList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 325
mas01mc@354 326 # print resultList
mas01mc@354 327 # print distanceList
mas01mc@354 328 # print positionList
mas01mc@354 329
mas01mc@354 330 # Print out a summary of the most similar results
mas01mc@354 331 for i in range(0,len(resultList)):
mas01mc@354 332 if i > 0 and resultList[i] != resultList[i-1]:
mas01mc@354 333 print
mas01mc@354 334 print positionList[i], distanceList[i], resultList[i]
mas01mc@354 335
mas01mc@354 336 dom.unlink()
mas01mc@354 337
mas01mc@354 338 ############### XML and Network Utilities ###########
mas01mc@354 339 # Send one XML SOAP command to the server. Get back the response.
mas01mc@354 340
mas01mc@354 341 def SendXMLCommand(message):
mas01mc@354 342 global debug
mas01mc@354 343 if debug:
mas01mc@354 344 print message
mas01mc@354 345 print
mas01mc@354 346
mas01mc@354 347 #Create a socket
mas01mc@354 348 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mas01mc@354 349
mas01mc@354 350 #Connect to server
mas01mc@354 351 sSock.connect((serverHost, serverPort))
mas01mc@354 352
mas01mc@354 353 #Send messages
mas01mc@354 354 sSock.send(message)
mas01mc@354 355 data = ""
mas01mc@354 356 # Now loop, while getting all the data we can get from the socket.
mas01mc@354 357 while True:
mas01mc@354 358 c = sSock.recv(4096)
mas01mc@354 359 if c == "":
mas01mc@354 360 break
mas01mc@354 361 data += c
mas01mc@354 362 if data == "":
mas01mc@354 363 print "No response from the audioDB server"
mas01mc@354 364 sys.exit(0)
mas01mc@354 365 # Split off the HTTP header and the data
mas01mc@354 366 header,response = data.split("\r\n\r\n", 1)
mas01mc@354 367 if debug:
mas01mc@354 368 print 'Client received: ',response
mas01mc@354 369
mas01mc@354 370 sSock.close()
mas01mc@354 371 return response
mas01mc@354 372
mas01mc@354 373
mas01mc@354 374 ############### Main Program - Figure out which query we want ###########
mas01mc@354 375
mas01mc@354 376 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
mas01mc@354 377
mas01mc@354 378 import sys
mas01mc@354 379 if __name__=="__main__":
mas01mc@354 380 cmdname = sys.argv[0]
mas01mc@354 381 if len(sys.argv) == 1:
mas01mc@474 382 print "Syntax: " + sys.argv[0] + " -{s,q,f,l} feature_file [power_file] pos len"
mas01mc@354 383 sys.exit(1)
mas01mc@354 384
mas01mc@354 385 queryType = sys.argv[1]
mas01mc@354 386 if queryType == '-s' or queryType == 'status':
mas01mc@354 387 response = RunStatusQuery()
mas01mc@354 388 for k, v in response.iteritems():
mas01mc@354 389 print k, v
mas01mc@354 390 elif queryType == '-q' or queryType == 'query':
mas01mc@354 391 RunSequenceQuery(sys.argv)
mas01mc@474 392 elif queryType == '-f' or queryType == 'feature':
mas01mc@474 393 RunQuery(sys.argv)
mas01mc@354 394 elif queryType == '-l' or queryType == 'list':
mas01mc@354 395 response = RunListQuery()
mas01mc@354 396 # print response
mas01mc@354 397 results = ParseListXML(response)
mas01mc@354 398 for (f,l) in results:
mas01mc@354 399 print "%s\t%s" % (f,l)
mas01mc@354 400 elif queryType == '-v' or queryType == 'vector':
mas01mc@354 401 response = RunShingleQuery()
mas01mc@354 402
mas01mc@354 403