annotate QueryADB.py @ 369:6564be3109c5 gcc-4.3-cleanups

gcc-4.3 warning cleanups for lshlib.cpp (I do not believe that any of these changes contain significant copyrightable "intellectual property". However, to the extent that they do, the changes are hereby released into the Public Domain, and may be therefore be used by anyone for any purpose without need for consideration of any kind.)
author mas01cr
date Wed, 12 Nov 2008 15:23:32 +0000
parents 4871a3ed9e36
children f9d86b1db21c
rev   line source
mas01mc@354 1 #!/usr/bin/python
mas01mc@354 2
mas01mc@354 3 # Python code/library to query the audioDB via the SOAP web interface.
mas01mc@354 4 # by Malcolm Slaney, August/September 2008
mas01mc@354 5 # malcolm@ieee.org
mas01mc@354 6
mas01mc@354 7 import sys, socket
mas01mc@354 8 from xml.dom.minidom import parseString
mas01mc@354 9
mas01mc@354 10 global debug
mas01mc@354 11 debug = False
mas01mc@354 12 global dbName
mas01mc@354 13 dbName = 'tutorial.adb'
mas01mc@354 14
mas01mc@354 15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
mas01mc@354 16 #serverHost = 'research-hm3.corp.sk1.yahoo.com'
mas01mc@354 17 serverHost = 'localhost'
mas01mc@354 18 serverPort = 14475
mas01mc@354 19
mas01mc@354 20 # Start the server on serverHost with
mas01mc@354 21 # ./audioDB -s 14475
mas01mc@354 22
mas01mc@354 23 # Here are the templates used for the different kinds of queries. We'll fill in the
mas01mc@354 24 # desired parameters are we go.
mas01mc@354 25 LIST_TEMPLATE = """
mas01mc@354 26 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 27 <SOAP-ENV:Envelope
mas01mc@354 28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 31 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 32 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 33 <SOAP-ENV:Body>
mas01mc@354 34 <adb:liszt>
mas01mc@354 35 <dbName>%s</dbName>
mas01mc@354 36 <lisztOffset>0</lisztOffset>
mas01mc@354 37 <lisztLength>100000</lisztLength>
mas01mc@354 38 </adb:liszt>
mas01mc@354 39 </SOAP-ENV:Body>
mas01mc@354 40 </SOAP-ENV:Envelope>
mas01mc@354 41 """
mas01mc@354 42
mas01mc@354 43 SEQUENCE_TEMPLATE = """
mas01mc@354 44 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 45 <SOAP-ENV:Envelope
mas01mc@354 46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 49 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 50 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 51 <SOAP-ENV:Body>
mas01mc@354 52 <adb:sequenceQueryByKey>
mas01mc@354 53 <dbName>%s</dbName>
mas01mc@354 54 <featureFileName>%s</featureFileName>
mas01mc@354 55 <queryType>%s</queryType>
mas01mc@354 56 <trackFileName></trackFileName>
mas01mc@354 57 <timesFileName></timesFileName>
mas01mc@354 58 <queryPoint>%s</queryPoint>
mas01mc@354 59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve -->
mas01mc@354 60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve -->
mas01mc@354 61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle -->
mas01mc@354 62 <radius>%s</radius> <!-- Distance radius to search -->
mas01mc@354 63 <absolute-threshold>-4.0</absolute-threshold>
mas01mc@354 64 <usingQueryPoint>1</usingQueryPoint>
mas01mc@354 65 <lsh-exact>0</lsh-exact>
mas01mc@354 66 </adb:sequenceQueryByKey>
mas01mc@354 67 </SOAP-ENV:Body>
mas01mc@354 68 </SOAP-ENV:Envelope>
mas01mc@354 69 """
mas01mc@354 70
mas01mc@354 71 STATUS_TEMPLATE = """
mas01mc@354 72 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 73 <SOAP-ENV:Envelope
mas01mc@354 74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 77 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 78 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 79 <SOAP-ENV:Body>
mas01mc@354 80 <adb:status>
mas01mc@354 81 <dbName>%s</dbName>
mas01mc@354 82 </adb:status>
mas01mc@354 83 </SOAP-ENV:Body>
mas01mc@354 84 </SOAP-ENV:Envelope>
mas01mc@354 85 """
mas01mc@354 86
mas01mc@354 87 SHINGLE_QUERY_TEMPLATE = """
mas01mc@354 88 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 89 <SOAP-ENV:Envelope
mas01mc@354 90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 93 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 94 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 95 <SOAP-ENV:Body>
mas01mc@354 96 <adb:shingleQuery>
mas01mc@354 97 <dbName>%s</dbName>
mas01mc@354 98 <qVector>
mas01mc@354 99 <dim>%s</dim>
mas01mc@354 100 %s
mas01mc@354 101 %s
mas01mc@354 102 </qVector>
mas01mc@354 103 <keyList></keyList>
mas01mc@354 104 <timesFileName></timesFileName>
mas01mc@354 105 <queryType>32</queryType>
mas01mc@354 106 <queryPos>%s</queryPos>
mas01mc@354 107 <pointNN>%s</pointNN>
mas01mc@354 108 <trackNN>%s</trackNN>
mas01mc@354 109 <sequenceLength>1</sequenceLength>
mas01mc@354 110 <radius>%s</radius>
mas01mc@354 111 <absolute-threshold>%s</absolute-threshold>
mas01mc@354 112 <relative-threshold>%s</relative-threshold>
mas01mc@354 113 <exhaustive>%s</exhaustive>
mas01mc@354 114 <lsh-exact>%s</lsh-exact>
mas01mc@354 115 </adb:shingleQuery>
mas01mc@354 116 </SOAP-ENV:Body>
mas01mc@354 117 </SOAP-ENV:Envelope>
mas01mc@354 118 """
mas01mc@354 119
mas01mc@354 120 ############### List Query - Show the files in the database ###########
mas01mc@354 121 # Return a list of (key identifier, frame length) pairs.
mas01mc@354 122 def RunListQuery():
mas01mc@354 123 global debug, dbName
mas01mc@354 124 message = LIST_TEMPLATE%(dbName)
mas01mc@354 125
mas01mc@354 126 response = SendXMLCommand(message)
mas01mc@354 127 return response
mas01mc@354 128
mas01mc@354 129 # Construct a list from the two DOM entries passed. Used with the map routine to
mas01mc@354 130 # assemble the output.
mas01mc@354 131 def ParseListConstruct(f,l):
mas01mc@354 132 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
mas01mc@354 133 return el
mas01mc@354 134
mas01mc@354 135 def ParseListXML(response):
mas01mc@354 136 dom = parseString(response)
mas01mc@354 137 fileElements = dom.getElementsByTagName('Rkey')
mas01mc@354 138 # print fileElements
mas01mc@354 139 lenElements = dom.getElementsByTagName('Rlen')
mas01mc@354 140 # print lenElements
mas01mc@354 141 return map(ParseListConstruct, fileElements, lenElements)
mas01mc@354 142
mas01mc@354 143 ############### Status Query - Show the status of the database ###########
mas01mc@354 144 # Return a dictionary with the status fields
mas01mc@354 145 def GetDomElement(dom, field):
mas01mc@354 146 els = dom.getElementsByTagName(field)
mas01mc@354 147 if len(els) > 0:
mas01mc@354 148 return els[0].firstChild.data.encode('latin-1')
mas01mc@354 149 else:
mas01mc@354 150 return ""
mas01mc@354 151
mas01mc@354 152 def RunStatusQuery():
mas01mc@354 153 global debug, dbName
mas01mc@354 154 message = STATUS_TEMPLATE%(dbName)
mas01mc@354 155
mas01mc@354 156 response = SendXMLCommand(message)
mas01mc@354 157 # print response
mas01mc@354 158 dom = parseString(response)
mas01mc@354 159 status = {}
mas01mc@354 160 status['numFiles'] = GetDomElement(dom, 'numFiles')
mas01mc@354 161 status['dim'] = GetDomElement(dom, 'dim')
mas01mc@354 162 status['length'] = GetDomElement(dom, 'length')
mas01mc@354 163 status['dudCount'] = GetDomElement(dom, 'dudCount')
mas01mc@354 164 status['nullCount'] = GetDomElement(dom, 'nullCount')
mas01mc@354 165 return status
mas01mc@354 166
mas01mc@354 167
mas01mc@354 168
mas01mc@354 169 ############### Shingle/Matrix Query - Show the data closest to shingle range ###########
mas01mc@354 170 #
mas01mc@354 171 # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
mas01mc@354 172 # dim # number of columns
mas01mc@354 173 # f1,1 f1,2...f1,dim # first row
mas01mc@354 174 # f2,1 f2,2...f2,dim
mas01mc@354 175 # ...
mas01mc@354 176 # fN,1 fN,2...fN,dim # last row
mas01mc@354 177 # p1,1 # first row's power
mas01mc@354 178 # p2,1
mas01mc@354 179 # ...
mas01mc@354 180 # pN,1 # last row's power
mas01mc@354 181 #
mas01mc@354 182 def RunShingleQuery():
mas01mc@354 183 global debug, dbName
mas01mc@354 184 featureDim = '3'
mas01mc@354 185 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features
mas01mc@354 186 powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row
mas01mc@354 187 queryPos = '0' # where in the feature sequence to start the shingle query
mas01mc@354 188 pointNN = '10' # how many near points to return per track
mas01mc@354 189 trackNN = '10' # how many near tracks to return
mas01mc@354 190 radius = '1.0' # search radius
mas01mc@354 191 absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles)
mas01mc@354 192 relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore
mas01mc@354 193 exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength
mas01mc@354 194 lshExact = '0' # if using an index then compute exact distances after LSH retrieval
mas01mc@354 195 message = SHINGLE_QUERY_TEMPLATE
mas01mc@354 196 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
mas01mc@354 197 # print message
mas01mc@354 198 print message
mas01mc@354 199 response = SendXMLCommand(message)
mas01mc@354 200 ParseShingleXML(response)
mas01mc@354 201
mas01mc@354 202
mas01mc@354 203 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@354 204 def RunSequenceQuery(argv):
mas01mc@354 205 global debug, dbName
mas01mc@354 206 if len(argv) > 2:
mas01mc@354 207 dbKey = argv[2]
mas01mc@354 208 qType = '32' # nSequence
mas01mc@354 209 qPos = argv[3]
mas01mc@354 210 pointNN = '10'
mas01mc@354 211 trackNN = '5'
mas01mc@354 212 seqLen = argv[4]
mas01mc@354 213 queryRadius = '0.5'
mas01mc@354 214 else:
mas01mc@354 215 dbKey = 'tmp/3.chr'
mas01mc@354 216 qType = '32' # nSequence
mas01mc@354 217 qPos = '110'
mas01mc@354 218 pointNN = '10'
mas01mc@354 219 trackNN = '5'
mas01mc@354 220 seqLen = '20'
mas01mc@354 221 queryRadius = '0.4'
mas01mc@354 222
mas01mc@354 223 message = SEQUENCE_TEMPLATE
mas01mc@354 224 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
mas01mc@354 225 # print message
mas01mc@354 226 response = SendXMLCommand(message)
mas01mc@354 227 ParseShingleXML(response)
mas01mc@354 228
mas01mc@354 229 def ParseShingleXML(response):
mas01mc@354 230 # Grab all the responses
mas01mc@354 231 # See http://diveintopython.org/xml_processing/parsing_xml.html
mas01mc@354 232 dom = parseString(response)
mas01mc@354 233 resultList = []
mas01mc@354 234 for node in dom.getElementsByTagName('Rlist'):
mas01mc@354 235 # print node.toxml()
mas01mc@354 236 resultList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 237
mas01mc@354 238 distanceList = []
mas01mc@354 239 for node in dom.getElementsByTagName('Dist'):
mas01mc@354 240 # print node.toxml()
mas01mc@354 241 distanceList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 242
mas01mc@354 243 positionList = []
mas01mc@354 244 for node in dom.getElementsByTagName('Spos'):
mas01mc@354 245 # print node.toxml()
mas01mc@354 246 positionList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 247
mas01mc@354 248 # print resultList
mas01mc@354 249 # print distanceList
mas01mc@354 250 # print positionList
mas01mc@354 251
mas01mc@354 252 # Print out a summary of the most similar results
mas01mc@354 253 for i in range(0,len(resultList)):
mas01mc@354 254 if i > 0 and resultList[i] != resultList[i-1]:
mas01mc@354 255 print
mas01mc@354 256 print positionList[i], distanceList[i], resultList[i]
mas01mc@354 257
mas01mc@354 258 dom.unlink()
mas01mc@354 259
mas01mc@354 260 ############### XML and Network Utilities ###########
mas01mc@354 261 # Send one XML SOAP command to the server. Get back the response.
mas01mc@354 262
mas01mc@354 263 def SendXMLCommand(message):
mas01mc@354 264 global debug
mas01mc@354 265 if debug:
mas01mc@354 266 print message
mas01mc@354 267 print
mas01mc@354 268
mas01mc@354 269 #Create a socket
mas01mc@354 270 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mas01mc@354 271
mas01mc@354 272 #Connect to server
mas01mc@354 273 sSock.connect((serverHost, serverPort))
mas01mc@354 274
mas01mc@354 275 #Send messages
mas01mc@354 276 sSock.send(message)
mas01mc@354 277 data = ""
mas01mc@354 278 # Now loop, while getting all the data we can get from the socket.
mas01mc@354 279 while True:
mas01mc@354 280 c = sSock.recv(4096)
mas01mc@354 281 if c == "":
mas01mc@354 282 break
mas01mc@354 283 data += c
mas01mc@354 284 if data == "":
mas01mc@354 285 print "No response from the audioDB server"
mas01mc@354 286 sys.exit(0)
mas01mc@354 287 # Split off the HTTP header and the data
mas01mc@354 288 header,response = data.split("\r\n\r\n", 1)
mas01mc@354 289 if debug:
mas01mc@354 290 print 'Client received: ',response
mas01mc@354 291
mas01mc@354 292 sSock.close()
mas01mc@354 293 return response
mas01mc@354 294
mas01mc@354 295
mas01mc@354 296 ############### Main Program - Figure out which query we want ###########
mas01mc@354 297
mas01mc@354 298 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
mas01mc@354 299
mas01mc@354 300 import sys
mas01mc@354 301 if __name__=="__main__":
mas01mc@354 302 cmdname = sys.argv[0]
mas01mc@354 303 if len(sys.argv) == 1:
mas01mc@354 304 print "Syntax: " + sys.argv[0] + " -q feature_file pos len"
mas01mc@354 305 sys.exit(1)
mas01mc@354 306
mas01mc@354 307 queryType = sys.argv[1]
mas01mc@354 308 if queryType == '-s' or queryType == 'status':
mas01mc@354 309 response = RunStatusQuery()
mas01mc@354 310 for k, v in response.iteritems():
mas01mc@354 311 print k, v
mas01mc@354 312 elif queryType == '-q' or queryType == 'query':
mas01mc@354 313 RunSequenceQuery(sys.argv)
mas01mc@354 314 elif queryType == '-l' or queryType == 'list':
mas01mc@354 315 response = RunListQuery()
mas01mc@354 316 # print response
mas01mc@354 317 results = ParseListXML(response)
mas01mc@354 318 for (f,l) in results:
mas01mc@354 319 print "%s\t%s" % (f,l)
mas01mc@354 320 elif queryType == '-v' or queryType == 'vector':
mas01mc@354 321 response = RunShingleQuery()
mas01mc@354 322
mas01mc@354 323