# HG changeset patch # User mas01mc # Date 1226062106 0 # Node ID 4871a3ed9e3644bf97ed208796a5192f62f12125 # Parent 0d02bcd74a400093f839e7680c31f3f4c813b824 Added serialized feature query Web Service, server-side only, example Python script: QueryADB.py -v diff -r 0d02bcd74a40 -r 4871a3ed9e36 QueryADB.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QueryADB.py Fri Nov 07 12:48:26 2008 +0000 @@ -0,0 +1,323 @@ +#!/usr/bin/python + +# Python code/library to query the audioDB via the SOAP web interface. +# by Malcolm Slaney, August/September 2008 +# malcolm@ieee.org + +import sys, socket +from xml.dom.minidom import parseString + +global debug +debug = False +global dbName +dbName = 'tutorial.adb' + +# From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2 +#serverHost = 'research-hm3.corp.sk1.yahoo.com' +serverHost = 'localhost' +serverPort = 14475 + +# Start the server on serverHost with +# ./audioDB -s 14475 + +# Here are the templates used for the different kinds of queries. We'll fill in the +# desired parameters are we go. +LIST_TEMPLATE = """ + + + + + %s + 0 + 100000 + + + +""" + +SEQUENCE_TEMPLATE = """ + + + + + %s + %s + %s + + + %s + %s + %s + %s + %s + -4.0 + 1 + 0 + + + +""" + +STATUS_TEMPLATE = """ + + + + + %s + + + +""" + +SHINGLE_QUERY_TEMPLATE = """ + + + + + %s + + %s + %s + %s + + + + 32 + %s + %s + %s + 1 + %s + %s + %s + %s + %s + + + +""" + +############### List Query - Show the files in the database ########### +# Return a list of (key identifier, frame length) pairs. +def RunListQuery(): + global debug, dbName + message = LIST_TEMPLATE%(dbName) + + response = SendXMLCommand(message) + return response + +# Construct a list from the two DOM entries passed. Used with the map routine to +# assemble the output. +def ParseListConstruct(f,l): + el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')] + return el + +def ParseListXML(response): + dom = parseString(response) + fileElements = dom.getElementsByTagName('Rkey') + # print fileElements + lenElements = dom.getElementsByTagName('Rlen') + # print lenElements + return map(ParseListConstruct, fileElements, lenElements) + +############### Status Query - Show the status of the database ########### +# Return a dictionary with the status fields +def GetDomElement(dom, field): + els = dom.getElementsByTagName(field) + if len(els) > 0: + return els[0].firstChild.data.encode('latin-1') + else: + return "" + +def RunStatusQuery(): + global debug, dbName + message = STATUS_TEMPLATE%(dbName) + + response = SendXMLCommand(message) + # print response + dom = parseString(response) + status = {} + status['numFiles'] = GetDomElement(dom, 'numFiles') + status['dim'] = GetDomElement(dom, 'dim') + status['length'] = GetDomElement(dom, 'length') + status['dudCount'] = GetDomElement(dom, 'dudCount') + status['nullCount'] = GetDomElement(dom, 'nullCount') + return status + + + +############### Shingle/Matrix Query - Show the data closest to shingle range ########### +# +# Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column +# dim # number of columns +# f1,1 f1,2...f1,dim # first row +# f2,1 f2,2...f2,dim +# ... +# fN,1 fN,2...fN,dim # last row +# p1,1 # first row's power +# p2,1 +# ... +# pN,1 # last row's power +# +def RunShingleQuery(): + global debug, dbName + featureDim = '3' + queryVector='1.00.50.250.51.51.0' # two rows of features + powerVector='

-1.0

-1.0

' # one power feature per row + queryPos = '0' # where in the feature sequence to start the shingle query + pointNN = '10' # how many near points to return per track + trackNN = '10' # how many near tracks to return + radius = '1.0' # search radius + absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles) + relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore + exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength + lshExact = '0' # if using an index then compute exact distances after LSH retrieval + message = SHINGLE_QUERY_TEMPLATE + message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact); + # print message + print message + response = SendXMLCommand(message) + ParseShingleXML(response) + + +############### Sequence Query - Show the data closest to one query ########### +def RunSequenceQuery(argv): + global debug, dbName + if len(argv) > 2: + dbKey = argv[2] + qType = '32' # nSequence + qPos = argv[3] + pointNN = '10' + trackNN = '5' + seqLen = argv[4] + queryRadius = '0.5' + else: + dbKey = 'tmp/3.chr' + qType = '32' # nSequence + qPos = '110' + pointNN = '10' + trackNN = '5' + seqLen = '20' + queryRadius = '0.4' + + message = SEQUENCE_TEMPLATE + message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius) + # print message + response = SendXMLCommand(message) + ParseShingleXML(response) + +def ParseShingleXML(response): + # Grab all the responses + # See http://diveintopython.org/xml_processing/parsing_xml.html + dom = parseString(response) + resultList = [] + for node in dom.getElementsByTagName('Rlist'): + # print node.toxml() + resultList.append(node.firstChild.data.encode('latin-1')) + + distanceList = [] + for node in dom.getElementsByTagName('Dist'): + # print node.toxml() + distanceList.append(node.firstChild.data.encode('latin-1')) + + positionList = [] + for node in dom.getElementsByTagName('Spos'): + # print node.toxml() + positionList.append(node.firstChild.data.encode('latin-1')) + + # print resultList + # print distanceList + # print positionList + + # Print out a summary of the most similar results + for i in range(0,len(resultList)): + if i > 0 and resultList[i] != resultList[i-1]: + print + print positionList[i], distanceList[i], resultList[i] + + dom.unlink() + +############### XML and Network Utilities ########### +# Send one XML SOAP command to the server. Get back the response. + +def SendXMLCommand(message): + global debug + if debug: + print message + print + + #Create a socket + sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + #Connect to server + sSock.connect((serverHost, serverPort)) + + #Send messages + sSock.send(message) + data = "" + # Now loop, while getting all the data we can get from the socket. + while True: + c = sSock.recv(4096) + if c == "": + break + data += c + if data == "": + print "No response from the audioDB server" + sys.exit(0) + # Split off the HTTP header and the data + header,response = data.split("\r\n\r\n", 1) + if debug: + print 'Client received: ',response + + sSock.close() + return response + + +############### Main Program - Figure out which query we want ########### + +# Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html + +import sys +if __name__=="__main__": + cmdname = sys.argv[0] + if len(sys.argv) == 1: + print "Syntax: " + sys.argv[0] + " -q feature_file pos len" + sys.exit(1) + + queryType = sys.argv[1] + if queryType == '-s' or queryType == 'status': + response = RunStatusQuery() + for k, v in response.iteritems(): + print k, v + elif queryType == '-q' or queryType == 'query': + RunSequenceQuery(sys.argv) + elif queryType == '-l' or queryType == 'list': + response = RunListQuery() + # print response + results = ParseListXML(response) + for (f,l) in results: + print "%s\t%s" % (f,l) + elif queryType == '-v' or queryType == 'vector': + response = RunShingleQuery() + + diff -r 0d02bcd74a40 -r 4871a3ed9e36 audioDBws.h --- a/audioDBws.h Wed Oct 08 17:23:15 2008 +0000 +++ b/audioDBws.h Fri Nov 07 12:48:26 2008 +0000 @@ -47,6 +47,14 @@ unsigned int *Rlen; }; +struct adb__queryVector { + int dim; // dimensionality of the feature (d) + int __sizev; // l x d : + double *v; // pointer to query data + int __sizep; + double *p; +}; + struct adb__lisztResponse { struct adb__lisztResult result; }; @@ -61,3 +69,7 @@ int adb__query(xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__string powerFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int segLen, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse); int adb__sequenceQueryByKey(xsd__string dbName,xsd__string trackKey, xsd__string featureFileName, xsd__int queryType,xsd__string trackFileName,xsd__string timesFileName,xsd__int queryPoint,xsd__int pointNN,xsd__int trackNN,xsd__int sequenceLength,xsd__double radius,xsd__double absolute_threshold,xsd__int usingQueryPoint,xsd__int lsh_exact,struct adb__queryResponse &adbQueryResponse); + +// Query an audioDB database by vector (serialized), queryKey/featureFileName is here replaced with qVector +int adb__shingleQuery(xsd__string dbName, struct adb__queryVector qVector, xsd__string keyList, xsd__string timesFileName, xsd__int queryType, xsd__int queryPos, xsd__int pointNN, xsd__int trackNN, xsd__int sequenceLength, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse); + diff -r 0d02bcd74a40 -r 4871a3ed9e36 soap.cpp --- a/soap.cpp Wed Oct 08 17:23:15 2008 +0000 +++ b/soap.cpp Fri Nov 07 12:48:26 2008 +0000 @@ -282,7 +282,6 @@ } argv[argv_counter] = NULL; - try { audioDB(argc, (char* const*)argv, &adbQueryResponse); delete [] argv; @@ -359,7 +358,75 @@ return SOAP_FAULT; } } - + +// Query an audioDB database by vector (serialized) +int adb__shingleQuery(struct soap* soap, xsd__string dbName, struct adb__queryVector qVector, xsd__string keyList, xsd__string timesFileName, xsd__int queryType, xsd__int queryPos, xsd__int pointNN, xsd__int trackNN, xsd__int sequenceLength, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse){ + + // open a tmp file on the server, write shingle, query as a file with query point 0 + // and shingle length l/dim + char tmpFileName[] = "/tmp/adb_XXXXXX"; + int tmpFid = mkstemp(tmpFileName); + if(tmpFid==-1){ + cerr << "Cannot make tmpfile <" << tmpFileName << "> on server" << endl; + return SOAP_FAULT; + } + + FILE* tmpFile = fdopen(tmpFid, "r+b"); + if(!tmpFile){ + cerr << "error opening <" << tmpFileName << "> for write" << endl; + return SOAP_FAULT; + } + + if(fwrite(&qVector.dim, sizeof(int), 1, tmpFile)!=1){ + cerr << "error writing tmp file dim <"<< tmpFileName << ">" << endl; + return SOAP_FAULT; + } + + if(fwrite(qVector.v, sizeof(double), qVector.__sizev, tmpFile)!=(size_t)qVector.__sizev){ + cerr << "error writing tmp file doubles <" << tmpFileName << ">" << endl; + return SOAP_FAULT; + } + + // Close the file so that a new FD can be opened + fclose(tmpFile); + + char tmpFileName2[] = "/tmp/adbP_XXXXXX"; + int tmpFid2 = 0; + FILE* tmpFile2 = NULL; + + // Check if powers have been passed and write accordingly + if(qVector.__sizep){ + tmpFid2 = mkstemp(tmpFileName2); + tmpFile2 = fdopen(tmpFid2, "r+b"); + if(!tmpFile2){ + cerr << "error opening power file <" << tmpFileName2 << "> for write" << endl; + return SOAP_FAULT; + } + int pSize=1; + if(fwrite(&pSize, sizeof(int), 1, tmpFile2)!=1){ + cerr << "error writing tmp power file dim <"<< tmpFileName2 << ">" << endl; + return SOAP_FAULT; + } + + if(fwrite(qVector.p, sizeof(double), qVector.__sizep, tmpFile2)!=(size_t)qVector.__sizep){ + cerr << "error writing tmp power file doubles <" << tmpFileName2 << ">" << endl; + return SOAP_FAULT; + } + fclose(tmpFile2); + } + + // fix up sequenceLength if it isn't provided, we know what the caller wants by the size of the shingle + // and the feature dimensionality + if(!sequenceLength) + sequenceLength = qVector.__sizev/qVector.dim; + + int retVal = adb__query(soap, dbName, tmpFileName, keyList, timesFileName, qVector.__sizep?tmpFileName2:0, + queryType, queryPos, pointNN, trackNN, sequenceLength, radius, + absolute_threshold, relative_threshold, exhaustive, lsh_exact, adbQueryResponse); + + return retVal; +} + /* Server loop */ void audioDB::startServer(){ struct soap soap;