# HG changeset patch # User mas01mc # Date 1225105774 0 # Node ID d1afecc7178186a7a8ad872a50c03875570b42cb # Parent 75a59f58319e9e158bff66768bc0dfdcf156f894 Adding QueryADB.py python library for audioDB query over SOAP diff -r 75a59f58319e -r d1afecc71781 QueryADB.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QueryADB.py Mon Oct 27 11:09:34 2008 +0000 @@ -0,0 +1,299 @@ +#!/usr/bin/python + +# Python code/library to query the audioDB via the SOAP web interface. +# by Malcolm Slaney, August/September 2008 +# malcolm@ieee.org + +import sys, socket +from xml.dom.minidom import parseString + +global debug +debug = False +global dbName +dbName = 'tutorial.adb' + +# From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2 +#serverHost = 'research-hm3.corp.sk1.yahoo.com' +serverHost = 'localhost' +serverPort = 14475 + +# Start the server on serverHost with +# ./audioDB -s 14475 + +# Here are the templates used for the different kinds of queries. We'll fill in the +# desired parameters are we go. +LIST_TEMPLATE = """ + + + + + %s + 0 + 100000 + + + +""" + +SEQUENCE_TEMPLATE = """ + + + + + %s + %s + %s + + + %s + %s + %s + %s + %s + -4.0 + 1 + 0 + + + +""" + +STATUS_TEMPLATE = """ + + + + + %s + + + +""" + +SHINGLE_QUERY_TEMPLATE = """ + + + + + %s + + %s + + 32 + %s + %s + 1 + %s + %s + %s + + + +""" + +############### List Query - Show the files in the database ########### +# Return a list of (key identifier, frame length) pairs. +def RunListQuery(): + global debug, dbName + message = LIST_TEMPLATE%(dbName) + + response = SendXMLCommand(message) + return response + +# Construct a list from the two DOM entries passed. Used with the map routine to +# assemble the output. +def ParseListConstruct(f,l): + el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')] + return el + +def ParseListXML(response): + dom = parseString(response) + fileElements = dom.getElementsByTagName('Rkey') + # print fileElements + lenElements = dom.getElementsByTagName('Rlen') + # print lenElements + return map(ParseListConstruct, fileElements, lenElements) + +############### Status Query - Show the status of the database ########### +# Return a dictionary with the status fields +def GetDomElement(dom, field): + els = dom.getElementsByTagName(field) + if len(els) > 0: + return els[0].firstChild.data.encode('latin-1') + else: + return "" + +def RunStatusQuery(): + global debug, dbName + message = STATUS_TEMPLATE%(dbName) + + response = SendXMLCommand(message) + # print response + dom = parseString(response) + status = {} + status['numFiles'] = GetDomElement(dom, 'numFiles') + status['dim'] = GetDomElement(dom, 'dim') + status['length'] = GetDomElement(dom, 'length') + status['dudCount'] = GetDomElement(dom, 'dudCount') + status['nullCount'] = GetDomElement(dom, 'nullCount') + return status + + + +############### Shingle Query - Show the data closest to one shingle ########### +def RunShingleQuery(): + global debug, dbName + queryVector='1.00.50.25' + pointNN = '10' + trackNN = '5' + queryRadius = '0.5' + absoluteThreshold = '-4.5' + lshExact = '0' + message = SHINGLE_QUERY_TEMPLATE + message = SHINGLE_QUERY_TEMPLATE%(dbName, queryVector, pointNN, trackNN, queryRadius, absoluteThreshold, lshExact); + # print message + print message + response = SendXMLCommand(message) + ParseShingleXML(response) + + +############### Sequence Query - Show the data closest to one query ########### +def RunSequenceQuery(argv): + global debug, dbName + if len(argv) > 2: + dbKey = argv[2] + qType = '32' # nSequence + qPos = argv[3] + pointNN = '10' + trackNN = '5' + seqLen = argv[4] + queryRadius = '0.5' + else: + dbKey = 'tmp/3.chr' + qType = '32' # nSequence + qPos = '110' + pointNN = '10' + trackNN = '5' + seqLen = '20' + queryRadius = '0.4' + + message = SEQUENCE_TEMPLATE + message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius) + # print message + response = SendXMLCommand(message) + ParseShingleXML(response) + +def ParseShingleXML(response): + # Grab all the responses + # See http://diveintopython.org/xml_processing/parsing_xml.html + dom = parseString(response) + resultList = [] + for node in dom.getElementsByTagName('Rlist'): + # print node.toxml() + resultList.append(node.firstChild.data.encode('latin-1')) + + distanceList = [] + for node in dom.getElementsByTagName('Dist'): + # print node.toxml() + distanceList.append(node.firstChild.data.encode('latin-1')) + + positionList = [] + for node in dom.getElementsByTagName('Spos'): + # print node.toxml() + positionList.append(node.firstChild.data.encode('latin-1')) + + # print resultList + # print distanceList + # print positionList + + # Print out a summary of the most similar results + for i in range(0,len(resultList)): + if i > 0 and resultList[i] != resultList[i-1]: + print + print positionList[i], distanceList[i], resultList[i] + + dom.unlink() + +############### XML and Network Utilities ########### +# Send one XML SOAP command to the server. Get back the response. + +def SendXMLCommand(message): + global debug + if debug: + print message + print + + #Create a socket + sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + #Connect to server + sSock.connect((serverHost, serverPort)) + + #Send messages + sSock.send(message) + data = "" + # Now loop, while getting all the data we can get from the socket. + while True: + c = sSock.recv(4096) + if c == "": + break + data += c + if data == "": + print "No response from the audioDB server" + sys.exit(0) + # Split off the HTTP header and the data + header,response = data.split("\r\n\r\n", 1) + if debug: + print 'Client received: ',response + + sSock.close() + return response + + +############### Main Program - Figure out which query we want ########### + +# Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html + +import sys +if __name__=="__main__": + cmdname = sys.argv[0] + if len(sys.argv) == 1: + print "Syntax: " + sys.argv[0] + " -q feature_file pos len" + sys.exit(1) + + queryType = sys.argv[1] + if queryType == '-s' or queryType == 'status': + response = RunStatusQuery() + for k, v in response.iteritems(): + print k, v + elif queryType == '-q' or queryType == 'query': + RunSequenceQuery(sys.argv) + elif queryType == '-l' or queryType == 'list': + response = RunListQuery() + # print response + results = ParseListXML(response) + for (f,l) in results: + print "%s\t%s" % (f,l) + elif queryType == '-v' or queryType == 'vector': + response = RunShingleQuery() + +