Mercurial > hg > audiodb
changeset 354:4871a3ed9e36 pre_api
Added serialized feature query Web Service, server-side only, example Python script: QueryADB.py -v
author | mas01mc |
---|---|
date | Fri, 07 Nov 2008 12:48:26 +0000 |
parents | 0d02bcd74a40 |
children | 94c18f128ce8 |
files | QueryADB.py audioDBws.h soap.cpp |
diffstat | 3 files changed, 404 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QueryADB.py Fri Nov 07 12:48:26 2008 +0000 @@ -0,0 +1,323 @@ +#!/usr/bin/python + +# Python code/library to query the audioDB via the SOAP web interface. +# by Malcolm Slaney, August/September 2008 +# malcolm@ieee.org + +import sys, socket +from xml.dom.minidom import parseString + +global debug +debug = False +global dbName +dbName = 'tutorial.adb' + +# From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2 +#serverHost = 'research-hm3.corp.sk1.yahoo.com' +serverHost = 'localhost' +serverPort = 14475 + +# Start the server on serverHost with +# ./audioDB -s 14475 + +# Here are the templates used for the different kinds of queries. We'll fill in the +# desired parameters are we go. +LIST_TEMPLATE = """ +<?xml version="1.0" encoding="UTF-8"?> +<SOAP-ENV:Envelope + xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" + xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:xsd="http://www.w3.org/2001/XMLSchema" + xmlns:adb="http://tempuri.org/adb.xsd"> + <SOAP-ENV:Body> + <adb:liszt> + <dbName>%s</dbName> + <lisztOffset>0</lisztOffset> + <lisztLength>100000</lisztLength> + </adb:liszt> + </SOAP-ENV:Body> +</SOAP-ENV:Envelope> +""" + +SEQUENCE_TEMPLATE = """ +<?xml version="1.0" encoding="UTF-8"?> +<SOAP-ENV:Envelope +xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" +xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" +xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +xmlns:xsd="http://www.w3.org/2001/XMLSchema" +xmlns:adb="http://tempuri.org/adb.xsd"> +<SOAP-ENV:Body> + <adb:sequenceQueryByKey> + <dbName>%s</dbName> + <featureFileName>%s</featureFileName> + <queryType>%s</queryType> + <trackFileName></trackFileName> + <timesFileName></timesFileName> + <queryPoint>%s</queryPoint> + <pointNN>%s</pointNN> <!-- Number of nearest neighbors to retrieve --> + <trackNN>%s</trackNN> <!-- Number of tracks to retrieve --> + <sequenceLength>%s</sequenceLength> <!-- Number of frames in a shingle --> + <radius>%s</radius> <!-- Distance radius to search --> + <absolute-threshold>-4.0</absolute-threshold> + <usingQueryPoint>1</usingQueryPoint> + <lsh-exact>0</lsh-exact> + </adb:sequenceQueryByKey> +</SOAP-ENV:Body> +</SOAP-ENV:Envelope> +""" + +STATUS_TEMPLATE = """ +<?xml version="1.0" encoding="UTF-8"?> +<SOAP-ENV:Envelope + xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" + xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:xsd="http://www.w3.org/2001/XMLSchema" + xmlns:adb="http://tempuri.org/adb.xsd"> + <SOAP-ENV:Body> + <adb:status> + <dbName>%s</dbName> + </adb:status> + </SOAP-ENV:Body> +</SOAP-ENV:Envelope> +""" + +SHINGLE_QUERY_TEMPLATE = """ +<?xml version="1.0" encoding="UTF-8"?> +<SOAP-ENV:Envelope + xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" + xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:xsd="http://www.w3.org/2001/XMLSchema" + xmlns:adb="http://tempuri.org/adb.xsd"> + <SOAP-ENV:Body> + <adb:shingleQuery> + <dbName>%s</dbName> + <qVector> + <dim>%s</dim> + %s + %s + </qVector> + <keyList></keyList> + <timesFileName></timesFileName> + <queryType>32</queryType> + <queryPos>%s</queryPos> + <pointNN>%s</pointNN> + <trackNN>%s</trackNN> + <sequenceLength>1</sequenceLength> + <radius>%s</radius> + <absolute-threshold>%s</absolute-threshold> + <relative-threshold>%s</relative-threshold> + <exhaustive>%s</exhaustive> + <lsh-exact>%s</lsh-exact> + </adb:shingleQuery> + </SOAP-ENV:Body> +</SOAP-ENV:Envelope> +""" + +############### List Query - Show the files in the database ########### +# Return a list of (key identifier, frame length) pairs. +def RunListQuery(): + global debug, dbName + message = LIST_TEMPLATE%(dbName) + + response = SendXMLCommand(message) + return response + +# Construct a list from the two DOM entries passed. Used with the map routine to +# assemble the output. +def ParseListConstruct(f,l): + el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')] + return el + +def ParseListXML(response): + dom = parseString(response) + fileElements = dom.getElementsByTagName('Rkey') + # print fileElements + lenElements = dom.getElementsByTagName('Rlen') + # print lenElements + return map(ParseListConstruct, fileElements, lenElements) + +############### Status Query - Show the status of the database ########### +# Return a dictionary with the status fields +def GetDomElement(dom, field): + els = dom.getElementsByTagName(field) + if len(els) > 0: + return els[0].firstChild.data.encode('latin-1') + else: + return "" + +def RunStatusQuery(): + global debug, dbName + message = STATUS_TEMPLATE%(dbName) + + response = SendXMLCommand(message) + # print response + dom = parseString(response) + status = {} + status['numFiles'] = GetDomElement(dom, 'numFiles') + status['dim'] = GetDomElement(dom, 'dim') + status['length'] = GetDomElement(dom, 'length') + status['dudCount'] = GetDomElement(dom, 'dudCount') + status['nullCount'] = GetDomElement(dom, 'nullCount') + return status + + + +############### Shingle/Matrix Query - Show the data closest to shingle range ########### +# +# Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column +# dim # number of columns +# f1,1 f1,2...f1,dim # first row +# f2,1 f2,2...f2,dim +# ... +# fN,1 fN,2...fN,dim # last row +# p1,1 # first row's power +# p2,1 +# ... +# pN,1 # last row's power +# +def RunShingleQuery(): + global debug, dbName + featureDim = '3' + queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>' # two rows of features + powerVector='<p>-1.0</p><p>-1.0</p>' # one power feature per row + queryPos = '0' # where in the feature sequence to start the shingle query + pointNN = '10' # how many near points to return per track + trackNN = '10' # how many near tracks to return + radius = '1.0' # search radius + absoluteThreshold = '-4.5' # absolute silence threshold in Bels (query and database shingles) + relativeThreshold = '0' # relative silence threshold in Bels between features, 0 = ignore + exhaustive = '0' # 1 = perform query using all subsequences of features of length sequenceLength + lshExact = '0' # if using an index then compute exact distances after LSH retrieval + message = SHINGLE_QUERY_TEMPLATE + message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact); + # print message + print message + response = SendXMLCommand(message) + ParseShingleXML(response) + + +############### Sequence Query - Show the data closest to one query ########### +def RunSequenceQuery(argv): + global debug, dbName + if len(argv) > 2: + dbKey = argv[2] + qType = '32' # nSequence + qPos = argv[3] + pointNN = '10' + trackNN = '5' + seqLen = argv[4] + queryRadius = '0.5' + else: + dbKey = 'tmp/3.chr' + qType = '32' # nSequence + qPos = '110' + pointNN = '10' + trackNN = '5' + seqLen = '20' + queryRadius = '0.4' + + message = SEQUENCE_TEMPLATE + message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius) + # print message + response = SendXMLCommand(message) + ParseShingleXML(response) + +def ParseShingleXML(response): + # Grab all the responses + # See http://diveintopython.org/xml_processing/parsing_xml.html + dom = parseString(response) + resultList = [] + for node in dom.getElementsByTagName('Rlist'): + # print node.toxml() + resultList.append(node.firstChild.data.encode('latin-1')) + + distanceList = [] + for node in dom.getElementsByTagName('Dist'): + # print node.toxml() + distanceList.append(node.firstChild.data.encode('latin-1')) + + positionList = [] + for node in dom.getElementsByTagName('Spos'): + # print node.toxml() + positionList.append(node.firstChild.data.encode('latin-1')) + + # print resultList + # print distanceList + # print positionList + + # Print out a summary of the most similar results + for i in range(0,len(resultList)): + if i > 0 and resultList[i] != resultList[i-1]: + print + print positionList[i], distanceList[i], resultList[i] + + dom.unlink() + +############### XML and Network Utilities ########### +# Send one XML SOAP command to the server. Get back the response. + +def SendXMLCommand(message): + global debug + if debug: + print message + print + + #Create a socket + sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + #Connect to server + sSock.connect((serverHost, serverPort)) + + #Send messages + sSock.send(message) + data = "" + # Now loop, while getting all the data we can get from the socket. + while True: + c = sSock.recv(4096) + if c == "": + break + data += c + if data == "": + print "No response from the audioDB server" + sys.exit(0) + # Split off the HTTP header and the data + header,response = data.split("\r\n\r\n", 1) + if debug: + print 'Client received: ',response + + sSock.close() + return response + + +############### Main Program - Figure out which query we want ########### + +# Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html + +import sys +if __name__=="__main__": + cmdname = sys.argv[0] + if len(sys.argv) == 1: + print "Syntax: " + sys.argv[0] + " -q feature_file pos len" + sys.exit(1) + + queryType = sys.argv[1] + if queryType == '-s' or queryType == 'status': + response = RunStatusQuery() + for k, v in response.iteritems(): + print k, v + elif queryType == '-q' or queryType == 'query': + RunSequenceQuery(sys.argv) + elif queryType == '-l' or queryType == 'list': + response = RunListQuery() + # print response + results = ParseListXML(response) + for (f,l) in results: + print "%s\t%s" % (f,l) + elif queryType == '-v' or queryType == 'vector': + response = RunShingleQuery() + +
--- a/audioDBws.h Wed Oct 08 17:23:15 2008 +0000 +++ b/audioDBws.h Fri Nov 07 12:48:26 2008 +0000 @@ -47,6 +47,14 @@ unsigned int *Rlen; }; +struct adb__queryVector { + int dim; // dimensionality of the feature (d) + int __sizev; // l x d : + double *v; // pointer to query data + int __sizep; + double *p; +}; + struct adb__lisztResponse { struct adb__lisztResult result; }; @@ -61,3 +69,7 @@ int adb__query(xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__string powerFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int segLen, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse); int adb__sequenceQueryByKey(xsd__string dbName,xsd__string trackKey, xsd__string featureFileName, xsd__int queryType,xsd__string trackFileName,xsd__string timesFileName,xsd__int queryPoint,xsd__int pointNN,xsd__int trackNN,xsd__int sequenceLength,xsd__double radius,xsd__double absolute_threshold,xsd__int usingQueryPoint,xsd__int lsh_exact,struct adb__queryResponse &adbQueryResponse); + +// Query an audioDB database by vector (serialized), queryKey/featureFileName is here replaced with qVector +int adb__shingleQuery(xsd__string dbName, struct adb__queryVector qVector, xsd__string keyList, xsd__string timesFileName, xsd__int queryType, xsd__int queryPos, xsd__int pointNN, xsd__int trackNN, xsd__int sequenceLength, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse); +
--- a/soap.cpp Wed Oct 08 17:23:15 2008 +0000 +++ b/soap.cpp Fri Nov 07 12:48:26 2008 +0000 @@ -282,7 +282,6 @@ } argv[argv_counter] = NULL; - try { audioDB(argc, (char* const*)argv, &adbQueryResponse); delete [] argv; @@ -359,7 +358,75 @@ return SOAP_FAULT; } } - + +// Query an audioDB database by vector (serialized) +int adb__shingleQuery(struct soap* soap, xsd__string dbName, struct adb__queryVector qVector, xsd__string keyList, xsd__string timesFileName, xsd__int queryType, xsd__int queryPos, xsd__int pointNN, xsd__int trackNN, xsd__int sequenceLength, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse){ + + // open a tmp file on the server, write shingle, query as a file with query point 0 + // and shingle length l/dim + char tmpFileName[] = "/tmp/adb_XXXXXX"; + int tmpFid = mkstemp(tmpFileName); + if(tmpFid==-1){ + cerr << "Cannot make tmpfile <" << tmpFileName << "> on server" << endl; + return SOAP_FAULT; + } + + FILE* tmpFile = fdopen(tmpFid, "r+b"); + if(!tmpFile){ + cerr << "error opening <" << tmpFileName << "> for write" << endl; + return SOAP_FAULT; + } + + if(fwrite(&qVector.dim, sizeof(int), 1, tmpFile)!=1){ + cerr << "error writing tmp file dim <"<< tmpFileName << ">" << endl; + return SOAP_FAULT; + } + + if(fwrite(qVector.v, sizeof(double), qVector.__sizev, tmpFile)!=(size_t)qVector.__sizev){ + cerr << "error writing tmp file doubles <" << tmpFileName << ">" << endl; + return SOAP_FAULT; + } + + // Close the file so that a new FD can be opened + fclose(tmpFile); + + char tmpFileName2[] = "/tmp/adbP_XXXXXX"; + int tmpFid2 = 0; + FILE* tmpFile2 = NULL; + + // Check if powers have been passed and write accordingly + if(qVector.__sizep){ + tmpFid2 = mkstemp(tmpFileName2); + tmpFile2 = fdopen(tmpFid2, "r+b"); + if(!tmpFile2){ + cerr << "error opening power file <" << tmpFileName2 << "> for write" << endl; + return SOAP_FAULT; + } + int pSize=1; + if(fwrite(&pSize, sizeof(int), 1, tmpFile2)!=1){ + cerr << "error writing tmp power file dim <"<< tmpFileName2 << ">" << endl; + return SOAP_FAULT; + } + + if(fwrite(qVector.p, sizeof(double), qVector.__sizep, tmpFile2)!=(size_t)qVector.__sizep){ + cerr << "error writing tmp power file doubles <" << tmpFileName2 << ">" << endl; + return SOAP_FAULT; + } + fclose(tmpFile2); + } + + // fix up sequenceLength if it isn't provided, we know what the caller wants by the size of the shingle + // and the feature dimensionality + if(!sequenceLength) + sequenceLength = qVector.__sizev/qVector.dim; + + int retVal = adb__query(soap, dbName, tmpFileName, keyList, timesFileName, qVector.__sizep?tmpFileName2:0, + queryType, queryPos, pointNN, trackNN, sequenceLength, radius, + absolute_threshold, relative_threshold, exhaustive, lsh_exact, adbQueryResponse); + + return retVal; +} + /* Server loop */ void audioDB::startServer(){ struct soap soap;