mas01mc@354: #!/usr/bin/python
mas01mc@354:
mas01mc@354: # Python code/library to query the audioDB via the SOAP web interface.
mas01mc@354: # by Malcolm Slaney, August/September 2008
mas01mc@354: # malcolm@ieee.org
mas01mc@354:
mas01mc@354: import sys, socket
mas01mc@354: from xml.dom.minidom import parseString
mas01mc@354:
mas01mc@354: global debug
mas01mc@354: debug = False
mas01mc@354: global dbName
mas01mc@354: dbName = 'tutorial.adb'
mas01mc@354:
mas01mc@354: # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
mas01mc@354: #serverHost = 'research-hm3.corp.sk1.yahoo.com'
mas01mc@354: serverHost = 'localhost'
mas01mc@476: serverPort = 14475
mas01mc@354:
mas01mc@354: # Start the server on serverHost with
mas01mc@354: # ./audioDB -s 14475
mas01mc@354:
mas01mc@354: # Here are the templates used for the different kinds of queries. We'll fill in the
mas01mc@354: # desired parameters are we go.
mas01mc@354: LIST_TEMPLATE = """
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: %s
mas01mc@354: 0
mas01mc@354: 100000
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: """
mas01mc@354:
mas01mc@354: SEQUENCE_TEMPLATE = """
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354:
mas01mc@354:
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: -4.0
mas01mc@354: 1
mas01mc@354: 0
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: """
mas01mc@354:
mas01mc@354: STATUS_TEMPLATE = """
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: %s
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: """
mas01mc@354:
mas01mc@354: SHINGLE_QUERY_TEMPLATE = """
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: %s
mas01mc@354:
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: 32
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: 1
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354: %s
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: """
mas01mc@354:
mas01mc@474:
mas01mc@474:
mas01mc@474: FEATURE_QUERY_TEMPLATE = """
mas01mc@474:
mas01mc@474:
mas01mc@474:
mas01mc@474:
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474: %s
mas01mc@474:
mas01mc@474:
mas01mc@474:
mas01mc@474: """
mas01mc@474:
mas01mc@354: ############### List Query - Show the files in the database ###########
mas01mc@354: # Return a list of (key identifier, frame length) pairs.
mas01mc@354: def RunListQuery():
mas01mc@354: global debug, dbName
mas01mc@354: message = LIST_TEMPLATE%(dbName)
mas01mc@354:
mas01mc@354: response = SendXMLCommand(message)
mas01mc@354: return response
mas01mc@354:
mas01mc@354: # Construct a list from the two DOM entries passed. Used with the map routine to
mas01mc@354: # assemble the output.
mas01mc@354: def ParseListConstruct(f,l):
mas01mc@354: el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
mas01mc@354: return el
mas01mc@354:
mas01mc@354: def ParseListXML(response):
mas01mc@354: dom = parseString(response)
mas01mc@354: fileElements = dom.getElementsByTagName('Rkey')
mas01mc@354: # print fileElements
mas01mc@354: lenElements = dom.getElementsByTagName('Rlen')
mas01mc@354: # print lenElements
mas01mc@354: return map(ParseListConstruct, fileElements, lenElements)
mas01mc@354:
mas01mc@354: ############### Status Query - Show the status of the database ###########
mas01mc@354: # Return a dictionary with the status fields
mas01mc@354: def GetDomElement(dom, field):
mas01mc@354: els = dom.getElementsByTagName(field)
mas01mc@354: if len(els) > 0:
mas01mc@354: return els[0].firstChild.data.encode('latin-1')
mas01mc@354: else:
mas01mc@354: return ""
mas01mc@354:
mas01mc@354: def RunStatusQuery():
mas01mc@354: global debug, dbName
mas01mc@354: message = STATUS_TEMPLATE%(dbName)
mas01mc@354:
mas01mc@354: response = SendXMLCommand(message)
mas01mc@354: # print response
mas01mc@354: dom = parseString(response)
mas01mc@354: status = {}
mas01mc@354: status['numFiles'] = GetDomElement(dom, 'numFiles')
mas01mc@354: status['dim'] = GetDomElement(dom, 'dim')
mas01mc@354: status['length'] = GetDomElement(dom, 'length')
mas01mc@354: status['dudCount'] = GetDomElement(dom, 'dudCount')
mas01mc@354: status['nullCount'] = GetDomElement(dom, 'nullCount')
mas01mc@354: return status
mas01mc@354:
mas01mc@354:
mas01mc@354:
mas01mc@354: ############### Shingle/Matrix Query - Show the data closest to shingle range ###########
mas01mc@354: #
mas01mc@354: # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
mas01mc@354: # dim # number of columns
mas01mc@354: # f1,1 f1,2...f1,dim # first row
mas01mc@354: # f2,1 f2,2...f2,dim
mas01mc@354: # ...
mas01mc@354: # fN,1 fN,2...fN,dim # last row
mas01mc@354: # p1,1 # first row's power
mas01mc@354: # p2,1
mas01mc@354: # ...
mas01mc@354: # pN,1 # last row's power
mas01mc@354: #
mas01mc@354: def RunShingleQuery():
mas01mc@354: global debug, dbName
mas01mc@354: featureDim = '3'
mas01mc@354: queryVector='1.00.50.250.51.51.0' # two rows of features
mas01mc@354: powerVector='
-1.0
-1.0
' # one power feature per row
mas01mc@354: queryPos = '0' # where in the feature sequence to start the shingle query
mas01mc@354: pointNN = '10' # how many near points to return per track
mas01mc@354: trackNN = '10' # how many near tracks to return
mas01mc@354: radius = '1.0' # search radius
mas01mc@354: absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles)
mas01mc@354: relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore
mas01mc@354: exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength
mas01mc@354: lshExact = '0' # if using an index then compute exact distances after LSH retrieval
mas01mc@354: message = SHINGLE_QUERY_TEMPLATE
mas01mc@354: message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
mas01mc@354: # print message
mas01mc@354: print message
mas01mc@354: response = SendXMLCommand(message)
mas01mc@354: ParseShingleXML(response)
mas01mc@354:
mas01mc@354:
mas01mc@354: ############### Sequence Query - Show the data closest to one query ###########
mas01mc@354: def RunSequenceQuery(argv):
mas01mc@354: global debug, dbName
mas01mc@354: if len(argv) > 2:
mas01mc@354: dbKey = argv[2]
mas01mc@354: qType = '32' # nSequence
mas01mc@354: qPos = argv[3]
mas01mc@354: pointNN = '10'
mas01mc@354: trackNN = '5'
mas01mc@354: seqLen = argv[4]
mas01mc@474: queryRadius = '2'
mas01mc@354: else:
mas01mc@354: dbKey = 'tmp/3.chr'
mas01mc@354: qType = '32' # nSequence
mas01mc@354: qPos = '110'
mas01mc@354: pointNN = '10'
mas01mc@354: trackNN = '5'
mas01mc@354: seqLen = '20'
mas01mc@354: queryRadius = '0.4'
mas01mc@354:
mas01mc@354: message = SEQUENCE_TEMPLATE
mas01mc@354: message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
mas01mc@354: # print message
mas01mc@354: response = SendXMLCommand(message)
mas01mc@354: ParseShingleXML(response)
mas01mc@354:
mas01mc@474: ############### Sequence Query - Show the data closest to one query ###########
mas01mc@474: def RunQuery(argv):
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: # %s
mas01mc@474: global debug, dbName
mas01mc@474: if len(argv) > 2:
mas01mc@474: featureFile = argv[2]
mas01mc@474: powerFile = argv[3]
mas01mc@474: qType = '32' # nSequence
mas01mc@474: qPos = argv[4]
mas01mc@474: pointNN = '20'
mas01mc@474: trackNN = '5'
mas01mc@474: seqLen = argv[5]
mas01mc@513: queryRadius = '0.1'
mas01mc@474: else:
mas01mc@474: featureFile = 'foo.chr12'
mas01mc@474: powerFile = 'foo.power'
mas01mc@474: qType = '32' # nSequence
mas01mc@474: qPos = '0'
mas01mc@474: pointNN = '3'
mas01mc@474: trackNN = '5'
mas01mc@474: seqLen = '10'
mas01mc@513: queryRadius = '0.1'
mas01mc@474:
mas01mc@474: message = FEATURE_QUERY_TEMPLATE
mas01mc@474: message = FEATURE_QUERY_TEMPLATE%(dbName, featureFile, "", "", powerFile, qType, qPos, pointNN, trackNN, seqLen, queryRadius, '0.0', '0.0', '0', '1','0')
mas01mc@474:
mas01mc@474: print message
mas01mc@474: response = SendXMLCommand(message)
mas01mc@474: ParseShingleXML(response)
mas01mc@474:
mas01mc@354: def ParseShingleXML(response):
mas01mc@354: # Grab all the responses
mas01mc@354: # See http://diveintopython.org/xml_processing/parsing_xml.html
mas01mc@354: dom = parseString(response)
mas01mc@354: resultList = []
mas01mc@354: for node in dom.getElementsByTagName('Rlist'):
mas01mc@354: # print node.toxml()
mas01mc@354: resultList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354:
mas01mc@354: distanceList = []
mas01mc@354: for node in dom.getElementsByTagName('Dist'):
mas01mc@354: # print node.toxml()
mas01mc@354: distanceList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354:
mas01mc@354: positionList = []
mas01mc@354: for node in dom.getElementsByTagName('Spos'):
mas01mc@354: # print node.toxml()
mas01mc@354: positionList.append(node.firstChild.data.encode('latin-1'))
mas01mc@354:
mas01mc@354: # print resultList
mas01mc@354: # print distanceList
mas01mc@354: # print positionList
mas01mc@354:
mas01mc@354: # Print out a summary of the most similar results
mas01mc@354: for i in range(0,len(resultList)):
mas01mc@354: if i > 0 and resultList[i] != resultList[i-1]:
mas01mc@354: print
mas01mc@354: print positionList[i], distanceList[i], resultList[i]
mas01mc@354:
mas01mc@354: dom.unlink()
mas01mc@354:
mas01mc@354: ############### XML and Network Utilities ###########
mas01mc@354: # Send one XML SOAP command to the server. Get back the response.
mas01mc@354:
mas01mc@354: def SendXMLCommand(message):
mas01mc@354: global debug
mas01mc@354: if debug:
mas01mc@354: print message
mas01mc@354: print
mas01mc@354:
mas01mc@354: #Create a socket
mas01mc@354: sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mas01mc@354:
mas01mc@354: #Connect to server
mas01mc@354: sSock.connect((serverHost, serverPort))
mas01mc@354:
mas01mc@354: #Send messages
mas01mc@354: sSock.send(message)
mas01mc@354: data = ""
mas01mc@354: # Now loop, while getting all the data we can get from the socket.
mas01mc@354: while True:
mas01mc@354: c = sSock.recv(4096)
mas01mc@354: if c == "":
mas01mc@354: break
mas01mc@354: data += c
mas01mc@354: if data == "":
mas01mc@354: print "No response from the audioDB server"
mas01mc@354: sys.exit(0)
mas01mc@354: # Split off the HTTP header and the data
mas01mc@354: header,response = data.split("\r\n\r\n", 1)
mas01mc@354: if debug:
mas01mc@354: print 'Client received: ',response
mas01mc@354:
mas01mc@354: sSock.close()
mas01mc@354: return response
mas01mc@354:
mas01mc@354:
mas01mc@354: ############### Main Program - Figure out which query we want ###########
mas01mc@354:
mas01mc@354: # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
mas01mc@354:
mas01mc@354: import sys
mas01mc@354: if __name__=="__main__":
mas01mc@354: cmdname = sys.argv[0]
mas01mc@354: if len(sys.argv) == 1:
mas01mc@474: print "Syntax: " + sys.argv[0] + " -{s,q,f,l} feature_file [power_file] pos len"
mas01mc@354: sys.exit(1)
mas01mc@354:
mas01mc@354: queryType = sys.argv[1]
mas01mc@354: if queryType == '-s' or queryType == 'status':
mas01mc@354: response = RunStatusQuery()
mas01mc@354: for k, v in response.iteritems():
mas01mc@354: print k, v
mas01mc@354: elif queryType == '-q' or queryType == 'query':
mas01mc@354: RunSequenceQuery(sys.argv)
mas01mc@474: elif queryType == '-f' or queryType == 'feature':
mas01mc@474: RunQuery(sys.argv)
mas01mc@354: elif queryType == '-l' or queryType == 'list':
mas01mc@354: response = RunListQuery()
mas01mc@354: # print response
mas01mc@354: results = ParseListXML(response)
mas01mc@354: for (f,l) in results:
mas01mc@354: print "%s\t%s" % (f,l)
mas01mc@354: elif queryType == '-v' or queryType == 'vector':
mas01mc@354: response = RunShingleQuery()
mas01mc@354:
mas01mc@354: