annotate QueryADB.py @ 352:a22b18005853 serializedQuery

serialized query now with power.
author mas01mc
date Thu, 06 Nov 2008 22:57:39 +0000
parents af3bc78e0f77
children 1662745233d9
rev   line source
mas01mc@349 1 #!/usr/bin/python
mas01mc@349 2
mas01mc@349 3 # Python code/library to query the audioDB via the SOAP web interface.
mas01mc@349 4 # by Malcolm Slaney, August/September 2008
mas01mc@349 5 # malcolm@ieee.org
mas01mc@349 6
mas01mc@349 7 import sys, socket
mas01mc@349 8 from xml.dom.minidom import parseString
mas01mc@349 9
mas01mc@349 10 global debug
mas01mc@349 11 debug = False
mas01mc@349 12 global dbName
mas01mc@349 13 dbName = 'tutorial.adb'
mas01mc@349 14
mas01mc@349 15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
mas01mc@349 16 #serverHost = 'research-hm3.corp.sk1.yahoo.com'
mas01mc@349 17 serverHost = 'localhost'
mas01mc@349 18 serverPort = 14475
mas01mc@349 19
mas01mc@349 20 # Start the server on serverHost with
mas01mc@349 21 # ./audioDB -s 14475
mas01mc@349 22
mas01mc@349 23 # Here are the templates used for the different kinds of queries. We'll fill in the
mas01mc@349 24 # desired parameters are we go.
mas01mc@349 25 LIST_TEMPLATE = """
mas01mc@349 26 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@349 27 <SOAP-ENV:Envelope
mas01mc@349 28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@349 29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@349 30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@349 31 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@349 32 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@349 33 <SOAP-ENV:Body>
mas01mc@349 34 <adb:liszt>
mas01mc@349 35 <dbName>%s</dbName>
mas01mc@349 36 <lisztOffset>0</lisztOffset>
mas01mc@349 37 <lisztLength>100000</lisztLength>
mas01mc@349 38 </adb:liszt>
mas01mc@349 39 </SOAP-ENV:Body>
mas01mc@349 40 </SOAP-ENV:Envelope>
mas01mc@349 41 """
mas01mc@349 42
mas01mc@349 43 SEQUENCE_TEMPLATE = """
mas01mc@349 44 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@349 45 <SOAP-ENV:Envelope
mas01mc@349 46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@349 47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@349 48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@349 49 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@349 50 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@349 51 <SOAP-ENV:Body>
mas01mc@349 52 <adb:sequenceQueryByKey>
mas01mc@349 53 <dbName>%s</dbName>
mas01mc@349 54 <featureFileName>%s</featureFileName>
mas01mc@349 55 <queryType>%s</queryType>
mas01mc@349 56 <trackFileName></trackFileName>
mas01mc@349 57 <timesFileName></timesFileName>
mas01mc@349 58 <queryPoint>%s</queryPoint>
mas01mc@349 59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve -->
mas01mc@349 60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve -->
mas01mc@349 61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle -->
mas01mc@349 62 <radius>%s</radius> <!-- Distance radius to search -->
mas01mc@349 63 <absolute-threshold>-4.0</absolute-threshold>
mas01mc@349 64 <usingQueryPoint>1</usingQueryPoint>
mas01mc@349 65 <lsh-exact>0</lsh-exact>
mas01mc@349 66 </adb:sequenceQueryByKey>
mas01mc@349 67 </SOAP-ENV:Body>
mas01mc@349 68 </SOAP-ENV:Envelope>
mas01mc@349 69 """
mas01mc@349 70
mas01mc@349 71 STATUS_TEMPLATE = """
mas01mc@349 72 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@349 73 <SOAP-ENV:Envelope
mas01mc@349 74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@349 75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@349 76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@349 77 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@349 78 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@349 79 <SOAP-ENV:Body>
mas01mc@349 80 <adb:status>
mas01mc@349 81 <dbName>%s</dbName>
mas01mc@349 82 </adb:status>
mas01mc@349 83 </SOAP-ENV:Body>
mas01mc@349 84 </SOAP-ENV:Envelope>
mas01mc@349 85 """
mas01mc@349 86
mas01mc@349 87 SHINGLE_QUERY_TEMPLATE = """
mas01mc@349 88 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@349 89 <SOAP-ENV:Envelope
mas01mc@349 90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@349 91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@349 92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@349 93 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@349 94 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@349 95 <SOAP-ENV:Body>
mas01mc@349 96 <adb:shingleQuery>
mas01mc@349 97 <dbName>%s</dbName>
mas01mc@349 98 <qVector>
mas01mc@352 99 <dim>%s</dim>
mas01mc@351 100 %s
mas01mc@352 101 %s
mas01mc@349 102 </qVector>
mas01mc@349 103 <queryType>32</queryType>
mas01mc@349 104 <pointNN>%s</pointNN>
mas01mc@349 105 <trackNN>%s</trackNN>
mas01mc@349 106 <sequenceLength>1</sequenceLength>
mas01mc@349 107 <radius>%s</radius>
mas01mc@349 108 <absolute-threshold>%s</absolute-threshold>
mas01mc@349 109 <lsh-exact>%s</lsh-exact>
mas01mc@349 110 </adb:shingleQuery>
mas01mc@349 111 </SOAP-ENV:Body>
mas01mc@349 112 </SOAP-ENV:Envelope>
mas01mc@349 113 """
mas01mc@349 114
mas01mc@349 115 ############### List Query - Show the files in the database ###########
mas01mc@349 116 # Return a list of (key identifier, frame length) pairs.
mas01mc@349 117 def RunListQuery():
mas01mc@349 118 global debug, dbName
mas01mc@349 119 message = LIST_TEMPLATE%(dbName)
mas01mc@349 120
mas01mc@349 121 response = SendXMLCommand(message)
mas01mc@349 122 return response
mas01mc@349 123
mas01mc@349 124 # Construct a list from the two DOM entries passed. Used with the map routine to
mas01mc@349 125 # assemble the output.
mas01mc@349 126 def ParseListConstruct(f,l):
mas01mc@349 127 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
mas01mc@349 128 return el
mas01mc@349 129
mas01mc@349 130 def ParseListXML(response):
mas01mc@349 131 dom = parseString(response)
mas01mc@349 132 fileElements = dom.getElementsByTagName('Rkey')
mas01mc@349 133 # print fileElements
mas01mc@349 134 lenElements = dom.getElementsByTagName('Rlen')
mas01mc@349 135 # print lenElements
mas01mc@349 136 return map(ParseListConstruct, fileElements, lenElements)
mas01mc@349 137
mas01mc@349 138 ############### Status Query - Show the status of the database ###########
mas01mc@349 139 # Return a dictionary with the status fields
mas01mc@349 140 def GetDomElement(dom, field):
mas01mc@349 141 els = dom.getElementsByTagName(field)
mas01mc@349 142 if len(els) > 0:
mas01mc@349 143 return els[0].firstChild.data.encode('latin-1')
mas01mc@349 144 else:
mas01mc@349 145 return ""
mas01mc@349 146
mas01mc@349 147 def RunStatusQuery():
mas01mc@349 148 global debug, dbName
mas01mc@349 149 message = STATUS_TEMPLATE%(dbName)
mas01mc@349 150
mas01mc@349 151 response = SendXMLCommand(message)
mas01mc@349 152 # print response
mas01mc@349 153 dom = parseString(response)
mas01mc@349 154 status = {}
mas01mc@349 155 status['numFiles'] = GetDomElement(dom, 'numFiles')
mas01mc@349 156 status['dim'] = GetDomElement(dom, 'dim')
mas01mc@349 157 status['length'] = GetDomElement(dom, 'length')
mas01mc@349 158 status['dudCount'] = GetDomElement(dom, 'dudCount')
mas01mc@349 159 status['nullCount'] = GetDomElement(dom, 'nullCount')
mas01mc@349 160 return status
mas01mc@349 161
mas01mc@349 162
mas01mc@349 163
mas01mc@349 164 ############### Shingle Query - Show the data closest to one shingle ###########
mas01mc@349 165 def RunShingleQuery():
mas01mc@349 166 global debug, dbName
mas01mc@351 167 featureDim = '3'
mas01mc@352 168 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v>'
mas01mc@352 169 powerVector='<p>-1.0</p>'
mas01mc@349 170 pointNN = '10'
mas01mc@349 171 trackNN = '5'
mas01mc@349 172 queryRadius = '0.5'
mas01mc@349 173 absoluteThreshold = '-4.5'
mas01mc@349 174 lshExact = '0'
mas01mc@349 175 message = SHINGLE_QUERY_TEMPLATE
mas01mc@352 176 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, pointNN, trackNN, queryRadius, absoluteThreshold, lshExact);
mas01mc@349 177 # print message
mas01mc@349 178 print message
mas01mc@349 179 response = SendXMLCommand(message)
mas01mc@349 180 ParseShingleXML(response)
mas01mc@349 181
mas01mc@349 182
mas01mc@349 183 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@349 184 def RunSequenceQuery(argv):
mas01mc@349 185 global debug, dbName
mas01mc@349 186 if len(argv) > 2:
mas01mc@349 187 dbKey = argv[2]
mas01mc@349 188 qType = '32' # nSequence
mas01mc@349 189 qPos = argv[3]
mas01mc@349 190 pointNN = '10'
mas01mc@349 191 trackNN = '5'
mas01mc@349 192 seqLen = argv[4]
mas01mc@349 193 queryRadius = '0.5'
mas01mc@349 194 else:
mas01mc@349 195 dbKey = 'tmp/3.chr'
mas01mc@349 196 qType = '32' # nSequence
mas01mc@349 197 qPos = '110'
mas01mc@349 198 pointNN = '10'
mas01mc@349 199 trackNN = '5'
mas01mc@349 200 seqLen = '20'
mas01mc@349 201 queryRadius = '0.4'
mas01mc@349 202
mas01mc@349 203 message = SEQUENCE_TEMPLATE
mas01mc@349 204 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
mas01mc@349 205 # print message
mas01mc@349 206 response = SendXMLCommand(message)
mas01mc@349 207 ParseShingleXML(response)
mas01mc@349 208
mas01mc@349 209 def ParseShingleXML(response):
mas01mc@349 210 # Grab all the responses
mas01mc@349 211 # See http://diveintopython.org/xml_processing/parsing_xml.html
mas01mc@349 212 dom = parseString(response)
mas01mc@349 213 resultList = []
mas01mc@349 214 for node in dom.getElementsByTagName('Rlist'):
mas01mc@349 215 # print node.toxml()
mas01mc@349 216 resultList.append(node.firstChild.data.encode('latin-1'))
mas01mc@349 217
mas01mc@349 218 distanceList = []
mas01mc@349 219 for node in dom.getElementsByTagName('Dist'):
mas01mc@349 220 # print node.toxml()
mas01mc@349 221 distanceList.append(node.firstChild.data.encode('latin-1'))
mas01mc@349 222
mas01mc@349 223 positionList = []
mas01mc@349 224 for node in dom.getElementsByTagName('Spos'):
mas01mc@349 225 # print node.toxml()
mas01mc@349 226 positionList.append(node.firstChild.data.encode('latin-1'))
mas01mc@349 227
mas01mc@349 228 # print resultList
mas01mc@349 229 # print distanceList
mas01mc@349 230 # print positionList
mas01mc@349 231
mas01mc@349 232 # Print out a summary of the most similar results
mas01mc@349 233 for i in range(0,len(resultList)):
mas01mc@349 234 if i > 0 and resultList[i] != resultList[i-1]:
mas01mc@349 235 print
mas01mc@349 236 print positionList[i], distanceList[i], resultList[i]
mas01mc@349 237
mas01mc@349 238 dom.unlink()
mas01mc@349 239
mas01mc@349 240 ############### XML and Network Utilities ###########
mas01mc@349 241 # Send one XML SOAP command to the server. Get back the response.
mas01mc@349 242
mas01mc@349 243 def SendXMLCommand(message):
mas01mc@349 244 global debug
mas01mc@349 245 if debug:
mas01mc@349 246 print message
mas01mc@349 247 print
mas01mc@349 248
mas01mc@349 249 #Create a socket
mas01mc@349 250 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mas01mc@349 251
mas01mc@349 252 #Connect to server
mas01mc@349 253 sSock.connect((serverHost, serverPort))
mas01mc@349 254
mas01mc@349 255 #Send messages
mas01mc@349 256 sSock.send(message)
mas01mc@349 257 data = ""
mas01mc@349 258 # Now loop, while getting all the data we can get from the socket.
mas01mc@349 259 while True:
mas01mc@349 260 c = sSock.recv(4096)
mas01mc@349 261 if c == "":
mas01mc@349 262 break
mas01mc@349 263 data += c
mas01mc@349 264 if data == "":
mas01mc@349 265 print "No response from the audioDB server"
mas01mc@349 266 sys.exit(0)
mas01mc@349 267 # Split off the HTTP header and the data
mas01mc@349 268 header,response = data.split("\r\n\r\n", 1)
mas01mc@349 269 if debug:
mas01mc@349 270 print 'Client received: ',response
mas01mc@349 271
mas01mc@349 272 sSock.close()
mas01mc@349 273 return response
mas01mc@349 274
mas01mc@349 275
mas01mc@349 276 ############### Main Program - Figure out which query we want ###########
mas01mc@349 277
mas01mc@349 278 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
mas01mc@349 279
mas01mc@349 280 import sys
mas01mc@349 281 if __name__=="__main__":
mas01mc@349 282 cmdname = sys.argv[0]
mas01mc@349 283 if len(sys.argv) == 1:
mas01mc@349 284 print "Syntax: " + sys.argv[0] + " -q feature_file pos len"
mas01mc@349 285 sys.exit(1)
mas01mc@349 286
mas01mc@349 287 queryType = sys.argv[1]
mas01mc@349 288 if queryType == '-s' or queryType == 'status':
mas01mc@349 289 response = RunStatusQuery()
mas01mc@349 290 for k, v in response.iteritems():
mas01mc@349 291 print k, v
mas01mc@349 292 elif queryType == '-q' or queryType == 'query':
mas01mc@349 293 RunSequenceQuery(sys.argv)
mas01mc@349 294 elif queryType == '-l' or queryType == 'list':
mas01mc@349 295 response = RunListQuery()
mas01mc@349 296 # print response
mas01mc@349 297 results = ParseListXML(response)
mas01mc@349 298 for (f,l) in results:
mas01mc@349 299 print "%s\t%s" % (f,l)
mas01mc@349 300 elif queryType == '-v' or queryType == 'vector':
mas01mc@349 301 response = RunShingleQuery()
mas01mc@349 302
mas01mc@349 303