# HG changeset patch
# User mas01mc
# Date 1225105774 0
# Node ID d1afecc7178186a7a8ad872a50c03875570b42cb
# Parent 75a59f58319e9e158bff66768bc0dfdcf156f894
Adding QueryADB.py python library for audioDB query over SOAP
diff -r 75a59f58319e -r d1afecc71781 QueryADB.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/QueryADB.py Mon Oct 27 11:09:34 2008 +0000
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+
+# Python code/library to query the audioDB via the SOAP web interface.
+# by Malcolm Slaney, August/September 2008
+# malcolm@ieee.org
+
+import sys, socket
+from xml.dom.minidom import parseString
+
+global debug
+debug = False
+global dbName
+dbName = 'tutorial.adb'
+
+# From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
+#serverHost = 'research-hm3.corp.sk1.yahoo.com'
+serverHost = 'localhost'
+serverPort = 14475
+
+# Start the server on serverHost with
+# ./audioDB -s 14475
+
+# Here are the templates used for the different kinds of queries. We'll fill in the
+# desired parameters are we go.
+LIST_TEMPLATE = """
+
+
+
+
+ %s
+ 0
+ 100000
+
+
+
+"""
+
+SEQUENCE_TEMPLATE = """
+
+
+
+
+ %s
+ %s
+ %s
+
+
+ %s
+ %s
+ %s
+ %s
+ %s
+ -4.0
+ 1
+ 0
+
+
+
+"""
+
+STATUS_TEMPLATE = """
+
+
+
+
+ %s
+
+
+
+"""
+
+SHINGLE_QUERY_TEMPLATE = """
+
+
+
+
+ %s
+
+ %s
+
+ 32
+ %s
+ %s
+ 1
+ %s
+ %s
+ %s
+
+
+
+"""
+
+############### List Query - Show the files in the database ###########
+# Return a list of (key identifier, frame length) pairs.
+def RunListQuery():
+ global debug, dbName
+ message = LIST_TEMPLATE%(dbName)
+
+ response = SendXMLCommand(message)
+ return response
+
+# Construct a list from the two DOM entries passed. Used with the map routine to
+# assemble the output.
+def ParseListConstruct(f,l):
+ el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
+ return el
+
+def ParseListXML(response):
+ dom = parseString(response)
+ fileElements = dom.getElementsByTagName('Rkey')
+ # print fileElements
+ lenElements = dom.getElementsByTagName('Rlen')
+ # print lenElements
+ return map(ParseListConstruct, fileElements, lenElements)
+
+############### Status Query - Show the status of the database ###########
+# Return a dictionary with the status fields
+def GetDomElement(dom, field):
+ els = dom.getElementsByTagName(field)
+ if len(els) > 0:
+ return els[0].firstChild.data.encode('latin-1')
+ else:
+ return ""
+
+def RunStatusQuery():
+ global debug, dbName
+ message = STATUS_TEMPLATE%(dbName)
+
+ response = SendXMLCommand(message)
+ # print response
+ dom = parseString(response)
+ status = {}
+ status['numFiles'] = GetDomElement(dom, 'numFiles')
+ status['dim'] = GetDomElement(dom, 'dim')
+ status['length'] = GetDomElement(dom, 'length')
+ status['dudCount'] = GetDomElement(dom, 'dudCount')
+ status['nullCount'] = GetDomElement(dom, 'nullCount')
+ return status
+
+
+
+############### Shingle Query - Show the data closest to one shingle ###########
+def RunShingleQuery():
+ global debug, dbName
+ queryVector='1.00.50.25'
+ pointNN = '10'
+ trackNN = '5'
+ queryRadius = '0.5'
+ absoluteThreshold = '-4.5'
+ lshExact = '0'
+ message = SHINGLE_QUERY_TEMPLATE
+ message = SHINGLE_QUERY_TEMPLATE%(dbName, queryVector, pointNN, trackNN, queryRadius, absoluteThreshold, lshExact);
+ # print message
+ print message
+ response = SendXMLCommand(message)
+ ParseShingleXML(response)
+
+
+############### Sequence Query - Show the data closest to one query ###########
+def RunSequenceQuery(argv):
+ global debug, dbName
+ if len(argv) > 2:
+ dbKey = argv[2]
+ qType = '32' # nSequence
+ qPos = argv[3]
+ pointNN = '10'
+ trackNN = '5'
+ seqLen = argv[4]
+ queryRadius = '0.5'
+ else:
+ dbKey = 'tmp/3.chr'
+ qType = '32' # nSequence
+ qPos = '110'
+ pointNN = '10'
+ trackNN = '5'
+ seqLen = '20'
+ queryRadius = '0.4'
+
+ message = SEQUENCE_TEMPLATE
+ message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
+ # print message
+ response = SendXMLCommand(message)
+ ParseShingleXML(response)
+
+def ParseShingleXML(response):
+ # Grab all the responses
+ # See http://diveintopython.org/xml_processing/parsing_xml.html
+ dom = parseString(response)
+ resultList = []
+ for node in dom.getElementsByTagName('Rlist'):
+ # print node.toxml()
+ resultList.append(node.firstChild.data.encode('latin-1'))
+
+ distanceList = []
+ for node in dom.getElementsByTagName('Dist'):
+ # print node.toxml()
+ distanceList.append(node.firstChild.data.encode('latin-1'))
+
+ positionList = []
+ for node in dom.getElementsByTagName('Spos'):
+ # print node.toxml()
+ positionList.append(node.firstChild.data.encode('latin-1'))
+
+ # print resultList
+ # print distanceList
+ # print positionList
+
+ # Print out a summary of the most similar results
+ for i in range(0,len(resultList)):
+ if i > 0 and resultList[i] != resultList[i-1]:
+ print
+ print positionList[i], distanceList[i], resultList[i]
+
+ dom.unlink()
+
+############### XML and Network Utilities ###########
+# Send one XML SOAP command to the server. Get back the response.
+
+def SendXMLCommand(message):
+ global debug
+ if debug:
+ print message
+ print
+
+ #Create a socket
+ sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+ #Connect to server
+ sSock.connect((serverHost, serverPort))
+
+ #Send messages
+ sSock.send(message)
+ data = ""
+ # Now loop, while getting all the data we can get from the socket.
+ while True:
+ c = sSock.recv(4096)
+ if c == "":
+ break
+ data += c
+ if data == "":
+ print "No response from the audioDB server"
+ sys.exit(0)
+ # Split off the HTTP header and the data
+ header,response = data.split("\r\n\r\n", 1)
+ if debug:
+ print 'Client received: ',response
+
+ sSock.close()
+ return response
+
+
+############### Main Program - Figure out which query we want ###########
+
+# Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
+
+import sys
+if __name__=="__main__":
+ cmdname = sys.argv[0]
+ if len(sys.argv) == 1:
+ print "Syntax: " + sys.argv[0] + " -q feature_file pos len"
+ sys.exit(1)
+
+ queryType = sys.argv[1]
+ if queryType == '-s' or queryType == 'status':
+ response = RunStatusQuery()
+ for k, v in response.iteritems():
+ print k, v
+ elif queryType == '-q' or queryType == 'query':
+ RunSequenceQuery(sys.argv)
+ elif queryType == '-l' or queryType == 'list':
+ response = RunListQuery()
+ # print response
+ results = ParseListXML(response)
+ for (f,l) in results:
+ print "%s\t%s" % (f,l)
+ elif queryType == '-v' or queryType == 'vector':
+ response = RunShingleQuery()
+
+