mas01mc@354: #!/usr/bin/python mas01mc@354: mas01mc@354: # Python code/library to query the audioDB via the SOAP web interface. mas01mc@354: # by Malcolm Slaney, August/September 2008 mas01mc@354: # malcolm@ieee.org mas01mc@354: mas01mc@354: import sys, socket mas01mc@354: from xml.dom.minidom import parseString mas01mc@354: mas01mc@354: global debug mas01mc@354: debug = False mas01mc@354: global dbName mas01mc@354: dbName = 'tutorial.adb' mas01mc@354: mas01mc@354: # From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2 mas01mc@354: #serverHost = 'research-hm3.corp.sk1.yahoo.com' mas01mc@354: serverHost = 'localhost' mas01mc@476: serverPort = 14475 mas01mc@354: mas01mc@354: # Start the server on serverHost with mas01mc@354: # ./audioDB -s 14475 mas01mc@354: mas01mc@354: # Here are the templates used for the different kinds of queries. We'll fill in the mas01mc@354: # desired parameters are we go. mas01mc@354: LIST_TEMPLATE = """ mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: %s mas01mc@354: 0 mas01mc@354: 100000 mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: """ mas01mc@354: mas01mc@354: SEQUENCE_TEMPLATE = """ mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: mas01mc@354: mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: -4.0 mas01mc@354: 1 mas01mc@354: 0 mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: """ mas01mc@354: mas01mc@354: STATUS_TEMPLATE = """ mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: %s mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: """ mas01mc@354: mas01mc@354: SHINGLE_QUERY_TEMPLATE = """ mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: %s mas01mc@354: mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: 32 mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: 1 mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: %s mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: """ mas01mc@354: mas01mc@474: mas01mc@474: mas01mc@474: FEATURE_QUERY_TEMPLATE = """ mas01mc@474: mas01mc@474: mas01mc@474: mas01mc@474: mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: %s mas01mc@474: mas01mc@474: mas01mc@474: mas01mc@474: """ mas01mc@474: mas01mc@354: ############### List Query - Show the files in the database ########### mas01mc@354: # Return a list of (key identifier, frame length) pairs. mas01mc@354: def RunListQuery(): mas01mc@354: global debug, dbName mas01mc@354: message = LIST_TEMPLATE%(dbName) mas01mc@354: mas01mc@354: response = SendXMLCommand(message) mas01mc@354: return response mas01mc@354: mas01mc@354: # Construct a list from the two DOM entries passed. Used with the map routine to mas01mc@354: # assemble the output. mas01mc@354: def ParseListConstruct(f,l): mas01mc@354: el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')] mas01mc@354: return el mas01mc@354: mas01mc@354: def ParseListXML(response): mas01mc@354: dom = parseString(response) mas01mc@354: fileElements = dom.getElementsByTagName('Rkey') mas01mc@354: # print fileElements mas01mc@354: lenElements = dom.getElementsByTagName('Rlen') mas01mc@354: # print lenElements mas01mc@354: return map(ParseListConstruct, fileElements, lenElements) mas01mc@354: mas01mc@354: ############### Status Query - Show the status of the database ########### mas01mc@354: # Return a dictionary with the status fields mas01mc@354: def GetDomElement(dom, field): mas01mc@354: els = dom.getElementsByTagName(field) mas01mc@354: if len(els) > 0: mas01mc@354: return els[0].firstChild.data.encode('latin-1') mas01mc@354: else: mas01mc@354: return "" mas01mc@354: mas01mc@354: def RunStatusQuery(): mas01mc@354: global debug, dbName mas01mc@354: message = STATUS_TEMPLATE%(dbName) mas01mc@354: mas01mc@354: response = SendXMLCommand(message) mas01mc@354: # print response mas01mc@354: dom = parseString(response) mas01mc@354: status = {} mas01mc@354: status['numFiles'] = GetDomElement(dom, 'numFiles') mas01mc@354: status['dim'] = GetDomElement(dom, 'dim') mas01mc@354: status['length'] = GetDomElement(dom, 'length') mas01mc@354: status['dudCount'] = GetDomElement(dom, 'dudCount') mas01mc@354: status['nullCount'] = GetDomElement(dom, 'nullCount') mas01mc@354: return status mas01mc@354: mas01mc@354: mas01mc@354: mas01mc@354: ############### Shingle/Matrix Query - Show the data closest to shingle range ########### mas01mc@354: # mas01mc@354: # Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column mas01mc@354: # dim # number of columns mas01mc@354: # f1,1 f1,2...f1,dim # first row mas01mc@354: # f2,1 f2,2...f2,dim mas01mc@354: # ... mas01mc@354: # fN,1 fN,2...fN,dim # last row mas01mc@354: # p1,1 # first row's power mas01mc@354: # p2,1 mas01mc@354: # ... mas01mc@354: # pN,1 # last row's power mas01mc@354: # mas01mc@354: def RunShingleQuery(): mas01mc@354: global debug, dbName mas01mc@354: featureDim = '3' mas01mc@354: queryVector='1.00.50.250.51.51.0' # two rows of features mas01mc@354: powerVector='

-1.0

-1.0

' # one power feature per row mas01mc@354: queryPos = '0' # where in the feature sequence to start the shingle query mas01mc@354: pointNN = '10' # how many near points to return per track mas01mc@354: trackNN = '10' # how many near tracks to return mas01mc@354: radius = '1.0' # search radius mas01mc@354: absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles) mas01mc@354: relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore mas01mc@354: exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength mas01mc@354: lshExact = '0' # if using an index then compute exact distances after LSH retrieval mas01mc@354: message = SHINGLE_QUERY_TEMPLATE mas01mc@354: message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact); mas01mc@354: # print message mas01mc@354: print message mas01mc@354: response = SendXMLCommand(message) mas01mc@354: ParseShingleXML(response) mas01mc@354: mas01mc@354: mas01mc@354: ############### Sequence Query - Show the data closest to one query ########### mas01mc@354: def RunSequenceQuery(argv): mas01mc@354: global debug, dbName mas01mc@354: if len(argv) > 2: mas01mc@354: dbKey = argv[2] mas01mc@354: qType = '32' # nSequence mas01mc@354: qPos = argv[3] mas01mc@354: pointNN = '10' mas01mc@354: trackNN = '5' mas01mc@354: seqLen = argv[4] mas01mc@474: queryRadius = '2' mas01mc@354: else: mas01mc@354: dbKey = 'tmp/3.chr' mas01mc@354: qType = '32' # nSequence mas01mc@354: qPos = '110' mas01mc@354: pointNN = '10' mas01mc@354: trackNN = '5' mas01mc@354: seqLen = '20' mas01mc@354: queryRadius = '0.4' mas01mc@354: mas01mc@354: message = SEQUENCE_TEMPLATE mas01mc@354: message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius) mas01mc@354: # print message mas01mc@354: response = SendXMLCommand(message) mas01mc@354: ParseShingleXML(response) mas01mc@354: mas01mc@474: ############### Sequence Query - Show the data closest to one query ########### mas01mc@474: def RunQuery(argv): mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: # %s mas01mc@474: global debug, dbName mas01mc@474: if len(argv) > 2: mas01mc@474: featureFile = argv[2] mas01mc@474: powerFile = argv[3] mas01mc@474: qType = '32' # nSequence mas01mc@474: qPos = argv[4] mas01mc@474: pointNN = '20' mas01mc@474: trackNN = '5' mas01mc@474: seqLen = argv[5] mas01mc@513: queryRadius = '0.1' mas01mc@474: else: mas01mc@474: featureFile = 'foo.chr12' mas01mc@474: powerFile = 'foo.power' mas01mc@474: qType = '32' # nSequence mas01mc@474: qPos = '0' mas01mc@474: pointNN = '3' mas01mc@474: trackNN = '5' mas01mc@474: seqLen = '10' mas01mc@513: queryRadius = '0.1' mas01mc@474: mas01mc@474: message = FEATURE_QUERY_TEMPLATE mas01mc@474: message = FEATURE_QUERY_TEMPLATE%(dbName, featureFile, "", "", powerFile, qType, qPos, pointNN, trackNN, seqLen, queryRadius, '0.0', '0.0', '0', '1','0') mas01mc@474: mas01mc@474: print message mas01mc@474: response = SendXMLCommand(message) mas01mc@474: ParseShingleXML(response) mas01mc@474: mas01mc@354: def ParseShingleXML(response): mas01mc@354: # Grab all the responses mas01mc@354: # See http://diveintopython.org/xml_processing/parsing_xml.html mas01mc@354: dom = parseString(response) mas01mc@354: resultList = [] mas01mc@354: for node in dom.getElementsByTagName('Rlist'): mas01mc@354: # print node.toxml() mas01mc@354: resultList.append(node.firstChild.data.encode('latin-1')) mas01mc@354: mas01mc@354: distanceList = [] mas01mc@354: for node in dom.getElementsByTagName('Dist'): mas01mc@354: # print node.toxml() mas01mc@354: distanceList.append(node.firstChild.data.encode('latin-1')) mas01mc@354: mas01mc@354: positionList = [] mas01mc@354: for node in dom.getElementsByTagName('Spos'): mas01mc@354: # print node.toxml() mas01mc@354: positionList.append(node.firstChild.data.encode('latin-1')) mas01mc@354: mas01mc@354: # print resultList mas01mc@354: # print distanceList mas01mc@354: # print positionList mas01mc@354: mas01mc@354: # Print out a summary of the most similar results mas01mc@354: for i in range(0,len(resultList)): mas01mc@354: if i > 0 and resultList[i] != resultList[i-1]: mas01mc@354: print mas01mc@354: print positionList[i], distanceList[i], resultList[i] mas01mc@354: mas01mc@354: dom.unlink() mas01mc@354: mas01mc@354: ############### XML and Network Utilities ########### mas01mc@354: # Send one XML SOAP command to the server. Get back the response. mas01mc@354: mas01mc@354: def SendXMLCommand(message): mas01mc@354: global debug mas01mc@354: if debug: mas01mc@354: print message mas01mc@354: print mas01mc@354: mas01mc@354: #Create a socket mas01mc@354: sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) mas01mc@354: mas01mc@354: #Connect to server mas01mc@354: sSock.connect((serverHost, serverPort)) mas01mc@354: mas01mc@354: #Send messages mas01mc@354: sSock.send(message) mas01mc@354: data = "" mas01mc@354: # Now loop, while getting all the data we can get from the socket. mas01mc@354: while True: mas01mc@354: c = sSock.recv(4096) mas01mc@354: if c == "": mas01mc@354: break mas01mc@354: data += c mas01mc@354: if data == "": mas01mc@354: print "No response from the audioDB server" mas01mc@354: sys.exit(0) mas01mc@354: # Split off the HTTP header and the data mas01mc@354: header,response = data.split("\r\n\r\n", 1) mas01mc@354: if debug: mas01mc@354: print 'Client received: ',response mas01mc@354: mas01mc@354: sSock.close() mas01mc@354: return response mas01mc@354: mas01mc@354: mas01mc@354: ############### Main Program - Figure out which query we want ########### mas01mc@354: mas01mc@354: # Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html mas01mc@354: mas01mc@354: import sys mas01mc@354: if __name__=="__main__": mas01mc@354: cmdname = sys.argv[0] mas01mc@354: if len(sys.argv) == 1: mas01mc@474: print "Syntax: " + sys.argv[0] + " -{s,q,f,l} feature_file [power_file] pos len" mas01mc@354: sys.exit(1) mas01mc@354: mas01mc@354: queryType = sys.argv[1] mas01mc@354: if queryType == '-s' or queryType == 'status': mas01mc@354: response = RunStatusQuery() mas01mc@354: for k, v in response.iteritems(): mas01mc@354: print k, v mas01mc@354: elif queryType == '-q' or queryType == 'query': mas01mc@354: RunSequenceQuery(sys.argv) mas01mc@474: elif queryType == '-f' or queryType == 'feature': mas01mc@474: RunQuery(sys.argv) mas01mc@354: elif queryType == '-l' or queryType == 'list': mas01mc@354: response = RunListQuery() mas01mc@354: # print response mas01mc@354: results = ParseListXML(response) mas01mc@354: for (f,l) in results: mas01mc@354: print "%s\t%s" % (f,l) mas01mc@354: elif queryType == '-v' or queryType == 'vector': mas01mc@354: response = RunShingleQuery() mas01mc@354: mas01mc@354: