annotate QueryADB.py @ 354:4871a3ed9e36 pre_api

Added serialized feature query Web Service, server-side only, example Python script: QueryADB.py -v
author mas01mc
date Fri, 07 Nov 2008 12:48:26 +0000
parents
children f9d86b1db21c
rev   line source
mas01mc@354 1 #!/usr/bin/python
mas01mc@354 2
mas01mc@354 3 # Python code/library to query the audioDB via the SOAP web interface.
mas01mc@354 4 # by Malcolm Slaney, August/September 2008
mas01mc@354 5 # malcolm@ieee.org
mas01mc@354 6
mas01mc@354 7 import sys, socket
mas01mc@354 8 from xml.dom.minidom import parseString
mas01mc@354 9
mas01mc@354 10 global debug
mas01mc@354 11 debug = False
mas01mc@354 12 global dbName
mas01mc@354 13 dbName = 'tutorial.adb'
mas01mc@354 14
mas01mc@354 15 # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
mas01mc@354 16 #serverHost = 'research-hm3.corp.sk1.yahoo.com'
mas01mc@354 17 serverHost = 'localhost'
mas01mc@354 18 serverPort = 14475
mas01mc@354 19
mas01mc@354 20 # Start the server on serverHost with
mas01mc@354 21 # ./audioDB -s 14475
mas01mc@354 22
mas01mc@354 23 # Here are the templates used for the different kinds of queries. We'll fill in the
mas01mc@354 24 # desired parameters are we go.
mas01mc@354 25 LIST_TEMPLATE = """
mas01mc@354 26 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 27 <SOAP-ENV:Envelope
mas01mc@354 28 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 29 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 30 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 31 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 32 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 33 <SOAP-ENV:Body>
mas01mc@354 34 <adb:liszt>
mas01mc@354 35 <dbName>%s</dbName>
mas01mc@354 36 <lisztOffset>0</lisztOffset>
mas01mc@354 37 <lisztLength>100000</lisztLength>
mas01mc@354 38 </adb:liszt>
mas01mc@354 39 </SOAP-ENV:Body>
mas01mc@354 40 </SOAP-ENV:Envelope>
mas01mc@354 41 """
mas01mc@354 42
mas01mc@354 43 SEQUENCE_TEMPLATE = """
mas01mc@354 44 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 45 <SOAP-ENV:Envelope
mas01mc@354 46 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 47 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 48 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 49 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 50 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 51 <SOAP-ENV:Body>
mas01mc@354 52 <adb:sequenceQueryByKey>
mas01mc@354 53 <dbName>%s</dbName>
mas01mc@354 54 <featureFileName>%s</featureFileName>
mas01mc@354 55 <queryType>%s</queryType>
mas01mc@354 56 <trackFileName></trackFileName>
mas01mc@354 57 <timesFileName></timesFileName>
mas01mc@354 58 <queryPoint>%s</queryPoint>
mas01mc@354 59 <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve -->
mas01mc@354 60 <trackNN>%s</trackNN> <!-- Number of tracks to retrieve -->
mas01mc@354 61 <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle -->
mas01mc@354 62 <radius>%s</radius> <!-- Distance radius to search -->
mas01mc@354 63 <absolute-threshold>-4.0</absolute-threshold>
mas01mc@354 64 <usingQueryPoint>1</usingQueryPoint>
mas01mc@354 65 <lsh-exact>0</lsh-exact>
mas01mc@354 66 </adb:sequenceQueryByKey>
mas01mc@354 67 </SOAP-ENV:Body>
mas01mc@354 68 </SOAP-ENV:Envelope>
mas01mc@354 69 """
mas01mc@354 70
mas01mc@354 71 STATUS_TEMPLATE = """
mas01mc@354 72 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 73 <SOAP-ENV:Envelope
mas01mc@354 74 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 75 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 76 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 77 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 78 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 79 <SOAP-ENV:Body>
mas01mc@354 80 <adb:status>
mas01mc@354 81 <dbName>%s</dbName>
mas01mc@354 82 </adb:status>
mas01mc@354 83 </SOAP-ENV:Body>
mas01mc@354 84 </SOAP-ENV:Envelope>
mas01mc@354 85 """
mas01mc@354 86
mas01mc@354 87 SHINGLE_QUERY_TEMPLATE = """
mas01mc@354 88 <?xml version="1.0" encoding="UTF-8"?>
mas01mc@354 89 <SOAP-ENV:Envelope
mas01mc@354 90 xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
mas01mc@354 91 xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
mas01mc@354 92 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
mas01mc@354 93 xmlns:xsd="http://www.w3.org/2001/XMLSchema"
mas01mc@354 94 xmlns:adb="http://tempuri.org/adb.xsd">
mas01mc@354 95 <SOAP-ENV:Body>
mas01mc@354 96 <adb:shingleQuery>
mas01mc@354 97 <dbName>%s</dbName>
mas01mc@354 98 <qVector>
mas01mc@354 99 <dim>%s</dim>
mas01mc@354 100 %s
mas01mc@354 101 %s
mas01mc@354 102 </qVector>
mas01mc@354 103 <keyList></keyList>
mas01mc@354 104 <timesFileName></timesFileName>
mas01mc@354 105 <queryType>32</queryType>
mas01mc@354 106 <queryPos>%s</queryPos>
mas01mc@354 107 <pointNN>%s</pointNN>
mas01mc@354 108 <trackNN>%s</trackNN>
mas01mc@354 109 <sequenceLength>1</sequenceLength>
mas01mc@354 110 <radius>%s</radius>
mas01mc@354 111 <absolute-threshold>%s</absolute-threshold>
mas01mc@354 112 <relative-threshold>%s</relative-threshold>
mas01mc@354 113 <exhaustive>%s</exhaustive>
mas01mc@354 114 <lsh-exact>%s</lsh-exact>
mas01mc@354 115 </adb:shingleQuery>
mas01mc@354 116 </SOAP-ENV:Body>
mas01mc@354 117 </SOAP-ENV:Envelope>
mas01mc@354 118 """
mas01mc@354 119
mas01mc@354 120 ############### List Query - Show the files in the database ###########
mas01mc@354 121 # Return a list of (key identifier, frame length) pairs.
mas01mc@354 122 def RunListQuery():
mas01mc@354 123 global debug, dbName
mas01mc@354 124 message = LIST_TEMPLATE%(dbName)
mas01mc@354 125
mas01mc@354 126 response = SendXMLCommand(message)
mas01mc@354 127 return response
mas01mc@354 128
mas01mc@354 129 # Construct a list from the two DOM entries passed. Used with the map routine to
mas01mc@354 130 # assemble the output.
mas01mc@354 131 def ParseListConstruct(f,l):
mas01mc@354 132 el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
mas01mc@354 133 return el
mas01mc@354 134
mas01mc@354 135 def ParseListXML(response):
mas01mc@354 136 dom = parseString(response)
mas01mc@354 137 fileElements = dom.getElementsByTagName('Rkey')
mas01mc@354 138 # print fileElements
mas01mc@354 139 lenElements = dom.getElementsByTagName('Rlen')
mas01mc@354 140 # print lenElements
mas01mc@354 141 return map(ParseListConstruct, fileElements, lenElements)
mas01mc@354 142
mas01mc@354 143 ############### Status Query - Show the status of the database ###########
mas01mc@354 144 # Return a dictionary with the status fields
mas01mc@354 145 def GetDomElement(dom, field):
mas01mc@354 146 els = dom.getElementsByTagName(field)
mas01mc@354 147 if len(els) > 0:
mas01mc@354 148 return els[0].firstChild.data.encode('latin-1')
mas01mc@354 149 else:
mas01mc@354 150 return ""
mas01mc@354 151
mas01mc@354 152 def RunStatusQuery():
mas01mc@354 153 global debug, dbName
mas01mc@354 154 message = STATUS_TEMPLATE%(dbName)
mas01mc@354 155
mas01mc@354 156 response = SendXMLCommand(message)
mas01mc@354 157 # print response
mas01mc@354 158 dom = parseString(response)
mas01mc@354 159 status = {}
mas01mc@354 160 status['numFiles'] = GetDomElement(dom, 'numFiles')
mas01mc@354 161 status['dim'] = GetDomElement(dom, 'dim')
mas01mc@354 162 status['length'] = GetDomElement(dom, 'length')
mas01mc@354 163 status['dudCount'] = GetDomElement(dom, 'dudCount')
mas01mc@354 164 status['nullCount'] = GetDomElement(dom, 'nullCount')
mas01mc@354 165 return status
mas01mc@354 166
mas01mc@354 167
mas01mc@354 168
mas01mc@354 169 ############### Shingle/Matrix Query - Show the data closest to shingle range ###########
mas01mc@354 170 #
mas01mc@354 171 # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
mas01mc@354 172 # dim # number of columns
mas01mc@354 173 # f1,1 f1,2...f1,dim # first row
mas01mc@354 174 # f2,1 f2,2...f2,dim
mas01mc@354 175 # ...
mas01mc@354 176 # fN,1 fN,2...fN,dim # last row
mas01mc@354 177 # p1,1 # first row's power
mas01mc@354 178 # p2,1
mas01mc@354 179 # ...
mas01mc@354 180 # pN,1 # last row's power
mas01mc@354 181 #
mas01mc@354 182 def RunShingleQuery():
mas01mc@354 183 global debug, dbName
mas01mc@354 184 featureDim = '3'
mas01mc@354 185 queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features
mas01mc@354 186 powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row
mas01mc@354 187 queryPos = '0' # where in the feature sequence to start the shingle query
mas01mc@354 188 pointNN = '10' # how many near points to return per track
mas01mc@354 189 trackNN = '10' # how many near tracks to return
mas01mc@354 190 radius = '1.0' # search radius
mas01mc@354 191 absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles)
mas01mc@354 192 relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore
mas01mc@354 193 exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength
mas01mc@354 194 lshExact = '0' # if using an index then compute exact distances after LSH retrieval
mas01mc@354 195 message = SHINGLE_QUERY_TEMPLATE
mas01mc@354 196 message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
mas01mc@354 197 # print message
mas01mc@354 198 print message
mas01mc@354 199 response = SendXMLCommand(message)
mas01mc@354 200 ParseShingleXML(response)
mas01mc@354 201
mas01mc@354 202
mas01mc@354 203 ############### Sequence Query - Show the data closest to one query ###########
mas01mc@354 204 def RunSequenceQuery(argv):
mas01mc@354 205 global debug, dbName
mas01mc@354 206 if len(argv) > 2:
mas01mc@354 207 dbKey = argv[2]
mas01mc@354 208 qType = '32' # nSequence
mas01mc@354 209 qPos = argv[3]
mas01mc@354 210 pointNN = '10'
mas01mc@354 211 trackNN = '5'
mas01mc@354 212 seqLen = argv[4]
mas01mc@354 213 queryRadius = '0.5'
mas01mc@354 214 else:
mas01mc@354 215 dbKey = 'tmp/3.chr'
mas01mc@354 216 qType = '32' # nSequence
mas01mc@354 217 qPos = '110'
mas01mc@354 218 pointNN = '10'
mas01mc@354 219 trackNN = '5'
mas01mc@354 220 seqLen = '20'
mas01mc@354 221 queryRadius = '0.4'
mas01mc@354 222
mas01mc@354 223 message = SEQUENCE_TEMPLATE
mas01mc@354 224 message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
mas01mc@354 225 # print message
mas01mc@354 226 response = SendXMLCommand(message)
mas01mc@354 227 ParseShingleXML(response)
mas01mc@354 228
mas01mc@354 229 def ParseShingleXML(response):
mas01mc@354 230 # Grab all the responses
mas01mc@354 231 # See http://diveintopython.org/xml_processing/parsing_xml.html
mas01mc@354 232 dom = parseString(response)
mas01mc@354 233 resultList = []
mas01mc@354 234 for node in dom.getElementsByTagName('Rlist'):
mas01mc@354 235 # print node.toxml()
mas01mc@354 236 resultList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 237
mas01mc@354 238 distanceList = []
mas01mc@354 239 for node in dom.getElementsByTagName('Dist'):
mas01mc@354 240 # print node.toxml()
mas01mc@354 241 distanceList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 242
mas01mc@354 243 positionList = []
mas01mc@354 244 for node in dom.getElementsByTagName('Spos'):
mas01mc@354 245 # print node.toxml()
mas01mc@354 246 positionList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354 247
mas01mc@354 248 # print resultList
mas01mc@354 249 # print distanceList
mas01mc@354 250 # print positionList
mas01mc@354 251
mas01mc@354 252 # Print out a summary of the most similar results
mas01mc@354 253 for i in range(0,len(resultList)):
mas01mc@354 254 if i > 0 and resultList[i] != resultList[i-1]:
mas01mc@354 255 print
mas01mc@354 256 print positionList[i], distanceList[i], resultList[i]
mas01mc@354 257
mas01mc@354 258 dom.unlink()
mas01mc@354 259
mas01mc@354 260 ############### XML and Network Utilities ###########
mas01mc@354 261 # Send one XML SOAP command to the server. Get back the response.
mas01mc@354 262
mas01mc@354 263 def SendXMLCommand(message):
mas01mc@354 264 global debug
mas01mc@354 265 if debug:
mas01mc@354 266 print message
mas01mc@354 267 print
mas01mc@354 268
mas01mc@354 269 #Create a socket
mas01mc@354 270 sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mas01mc@354 271
mas01mc@354 272 #Connect to server
mas01mc@354 273 sSock.connect((serverHost, serverPort))
mas01mc@354 274
mas01mc@354 275 #Send messages
mas01mc@354 276 sSock.send(message)
mas01mc@354 277 data = ""
mas01mc@354 278 # Now loop, while getting all the data we can get from the socket.
mas01mc@354 279 while True:
mas01mc@354 280 c = sSock.recv(4096)
mas01mc@354 281 if c == "":
mas01mc@354 282 break
mas01mc@354 283 data += c
mas01mc@354 284 if data == "":
mas01mc@354 285 print "No response from the audioDB server"
mas01mc@354 286 sys.exit(0)
mas01mc@354 287 # Split off the HTTP header and the data
mas01mc@354 288 header,response = data.split("\r\n\r\n", 1)
mas01mc@354 289 if debug:
mas01mc@354 290 print 'Client received: ',response
mas01mc@354 291
mas01mc@354 292 sSock.close()
mas01mc@354 293 return response
mas01mc@354 294
mas01mc@354 295
mas01mc@354 296 ############### Main Program - Figure out which query we want ###########
mas01mc@354 297
mas01mc@354 298 # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
mas01mc@354 299
mas01mc@354 300 import sys
mas01mc@354 301 if __name__=="__main__":
mas01mc@354 302 cmdname = sys.argv[0]
mas01mc@354 303 if len(sys.argv) == 1:
mas01mc@354 304 print "Syntax: " + sys.argv[0] + " -q feature_file pos len"
mas01mc@354 305 sys.exit(1)
mas01mc@354 306
mas01mc@354 307 queryType = sys.argv[1]
mas01mc@354 308 if queryType == '-s' or queryType == 'status':
mas01mc@354 309 response = RunStatusQuery()
mas01mc@354 310 for k, v in response.iteritems():
mas01mc@354 311 print k, v
mas01mc@354 312 elif queryType == '-q' or queryType == 'query':
mas01mc@354 313 RunSequenceQuery(sys.argv)
mas01mc@354 314 elif queryType == '-l' or queryType == 'list':
mas01mc@354 315 response = RunListQuery()
mas01mc@354 316 # print response
mas01mc@354 317 results = ParseListXML(response)
mas01mc@354 318 for (f,l) in results:
mas01mc@354 319 print "%s\t%s" % (f,l)
mas01mc@354 320 elif queryType == '-v' or queryType == 'vector':
mas01mc@354 321 response = RunShingleQuery()
mas01mc@354 322
mas01mc@354 323