changeset 354:4871a3ed9e36 pre_api

Added serialized feature query Web Service, server-side only, example Python script: QueryADB.py -v
author mas01mc
date Fri, 07 Nov 2008 12:48:26 +0000
parents 0d02bcd74a40
children 94c18f128ce8
files QueryADB.py audioDBws.h soap.cpp
diffstat 3 files changed, 404 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QueryADB.py	Fri Nov 07 12:48:26 2008 +0000
@@ -0,0 +1,323 @@
+#!/usr/bin/python
+
+# Python code/library to query the audioDB via the SOAP web interface.
+# by Malcolm Slaney, August/September 2008
+# malcolm@ieee.org
+
+import sys, socket
+from xml.dom.minidom import parseString
+
+global debug
+debug = False
+global dbName
+dbName = 'tutorial.adb'
+
+# From: http://www.informit.com/articles/article.aspx?p=686162&seqNum=2
+#serverHost = 'research-hm3.corp.sk1.yahoo.com'
+serverHost = 'localhost'
+serverPort = 14475
+
+# Start the server on serverHost with
+#	./audioDB -s 14475
+
+# Here are the templates used for the different kinds of queries.  We'll fill in the
+# desired parameters are we go.
+LIST_TEMPLATE = """
+<?xml version="1.0" encoding="UTF-8"?>
+<SOAP-ENV:Envelope
+ xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
+ xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:adb="http://tempuri.org/adb.xsd">
+ <SOAP-ENV:Body>
+  <adb:liszt>
+   <dbName>%s</dbName>
+   <lisztOffset>0</lisztOffset>
+   <lisztLength>100000</lisztLength>
+  </adb:liszt>
+ </SOAP-ENV:Body>
+</SOAP-ENV:Envelope>
+"""
+
+SEQUENCE_TEMPLATE = """
+<?xml version="1.0" encoding="UTF-8"?>
+<SOAP-ENV:Envelope
+xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
+xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+xmlns:adb="http://tempuri.org/adb.xsd">
+<SOAP-ENV:Body>
+ <adb:sequenceQueryByKey>
+  <dbName>%s</dbName>
+  <featureFileName>%s</featureFileName>
+  <queryType>%s</queryType>
+  <trackFileName></trackFileName>
+  <timesFileName></timesFileName>
+  <queryPoint>%s</queryPoint>
+  <pointNN>%s</pointNN>			<!-- Number of nearest neighbors to retrieve -->
+  <trackNN>%s</trackNN>			<!-- Number of tracks to retrieve -->
+  <sequenceLength>%s</sequenceLength>	<!-- Number of frames in a shingle -->
+  <radius>%s</radius>			<!-- Distance radius to search -->
+  <absolute-threshold>-4.0</absolute-threshold>
+  <usingQueryPoint>1</usingQueryPoint>
+  <lsh-exact>0</lsh-exact>
+ </adb:sequenceQueryByKey>
+</SOAP-ENV:Body>
+</SOAP-ENV:Envelope>
+"""
+
+STATUS_TEMPLATE = """
+<?xml version="1.0" encoding="UTF-8"?>
+<SOAP-ENV:Envelope
+ xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
+ xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:adb="http://tempuri.org/adb.xsd">
+ <SOAP-ENV:Body>
+  <adb:status>
+   <dbName>%s</dbName>
+  </adb:status>
+ </SOAP-ENV:Body>
+</SOAP-ENV:Envelope>
+"""
+
+SHINGLE_QUERY_TEMPLATE = """
+<?xml version="1.0" encoding="UTF-8"?>
+<SOAP-ENV:Envelope
+ xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
+ xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:adb="http://tempuri.org/adb.xsd">
+ <SOAP-ENV:Body>
+  <adb:shingleQuery>
+   <dbName>%s</dbName>
+   <qVector>
+   <dim>%s</dim>
+      %s
+      %s
+   </qVector>
+   <keyList></keyList>
+   <timesFileName></timesFileName>
+   <queryType>32</queryType>
+   <queryPos>%s</queryPos>
+   <pointNN>%s</pointNN>
+   <trackNN>%s</trackNN>
+   <sequenceLength>1</sequenceLength>
+   <radius>%s</radius>
+   <absolute-threshold>%s</absolute-threshold>
+   <relative-threshold>%s</relative-threshold>
+   <exhaustive>%s</exhaustive>
+   <lsh-exact>%s</lsh-exact>
+  </adb:shingleQuery>
+ </SOAP-ENV:Body>
+</SOAP-ENV:Envelope>
+"""
+
+###############  List Query - Show the files in the database ###########
+# Return a list of (key identifier, frame length) pairs.
+def RunListQuery():
+	global debug, dbName
+	message = LIST_TEMPLATE%(dbName)
+	
+	response = SendXMLCommand(message)
+	return response
+
+# Construct a list from the two DOM entries passed.  Used with the map routine to 
+# assemble the output.
+def ParseListConstruct(f,l):
+	el = [f.firstChild.data.encode('latin-1'),l.firstChild.data.encode('latin-1')]
+	return el
+	
+def ParseListXML(response):
+	dom = parseString(response)
+	fileElements = dom.getElementsByTagName('Rkey')
+	# print fileElements
+	lenElements = dom.getElementsByTagName('Rlen')
+	# print lenElements
+	return map(ParseListConstruct, fileElements, lenElements)
+
+###############  Status Query - Show the status of the database ###########
+# Return a dictionary with the status fields
+def GetDomElement(dom, field):
+	els = dom.getElementsByTagName(field)
+	if len(els) > 0:
+		return els[0].firstChild.data.encode('latin-1')
+	else:
+		return ""
+
+def RunStatusQuery():
+	global debug, dbName
+	message = STATUS_TEMPLATE%(dbName)
+
+	response = SendXMLCommand(message)
+	# print response
+	dom = parseString(response)
+	status = {}
+	status['numFiles'] = GetDomElement(dom, 'numFiles')
+	status['dim'] = GetDomElement(dom, 'dim')
+	status['length'] = GetDomElement(dom, 'length')
+	status['dudCount'] = GetDomElement(dom, 'dudCount')
+	status['nullCount'] = GetDomElement(dom, 'nullCount')
+	return status
+
+
+
+###############  Shingle/Matrix Query - Show the data closest to shingle range ###########
+#
+# Encode features as a matrix with dim columns, and (optionally) powers as a matrix with one column
+# dim   # number of columns
+# f1,1 f1,2...f1,dim # first row
+# f2,1 f2,2...f2,dim
+# ...
+# fN,1 fN,2...fN,dim # last row
+# p1,1         # first row's power
+# p2,1
+# ...
+# pN,1         # last row's power
+#
+def RunShingleQuery():
+	global debug, dbName
+	featureDim = '3'
+	queryVector='<v>1.0</v><v>0.5</v><v>0.25</v><v>0.5</v><v>1.5</v><v>1.0</v>'  # two rows of features
+	powerVector='<p>-1.0</p><p>-1.0</p>'   # one power feature per row
+	queryPos = '0'  # where in the feature sequence to start the shingle query
+	pointNN = '10'  # how many near points to return per track
+	trackNN = '10'  # how many near tracks to return
+	radius = '1.0'  # search radius
+	absoluteThreshold = '-4.5'  # absolute silence threshold in Bels (query and database shingles)
+	relativeThreshold = '0'     # relative silence threshold in Bels between features, 0 = ignore
+	exhaustive = '0'            # 1 = perform query using all subsequences of features of length sequenceLength
+	lshExact = '0'              # if using an index then compute exact distances after LSH retrieval
+	message = SHINGLE_QUERY_TEMPLATE
+	message = SHINGLE_QUERY_TEMPLATE%(dbName, featureDim, queryVector, powerVector, queryPos, pointNN, trackNN, radius, absoluteThreshold, relativeThreshold, exhaustive, lshExact);
+	# print message
+	print message
+	response = SendXMLCommand(message)
+	ParseShingleXML(response)
+		
+
+###############  Sequence Query - Show the data closest to one query ###########
+def RunSequenceQuery(argv):
+	global debug, dbName	
+	if len(argv) > 2:
+		dbKey = argv[2]
+		qType = '32'			# nSequence
+		qPos = argv[3]
+		pointNN = '10'
+		trackNN = '5'
+		seqLen = argv[4]
+		queryRadius = '0.5'
+	else:
+		dbKey = 'tmp/3.chr'
+		qType = '32'			# nSequence
+		qPos = '110'
+		pointNN = '10'
+		trackNN = '5'
+		seqLen = '20'
+		queryRadius = '0.4'
+
+	message = SEQUENCE_TEMPLATE
+	message = SEQUENCE_TEMPLATE%(dbName, dbKey, qType, qPos, pointNN, trackNN, seqLen, queryRadius)
+	# print message
+	response = SendXMLCommand(message)
+	ParseShingleXML(response)
+
+def ParseShingleXML(response):
+	# Grab all the responses
+	#	See http://diveintopython.org/xml_processing/parsing_xml.html
+	dom = parseString(response)
+	resultList = []
+	for node in dom.getElementsByTagName('Rlist'):
+		# print node.toxml()
+		resultList.append(node.firstChild.data.encode('latin-1'))
+
+	distanceList = []
+	for node in dom.getElementsByTagName('Dist'):
+		# print node.toxml()
+		distanceList.append(node.firstChild.data.encode('latin-1'))
+
+	positionList = []
+	for node in dom.getElementsByTagName('Spos'):
+		# print node.toxml()
+		positionList.append(node.firstChild.data.encode('latin-1'))
+
+	# print resultList
+	# print distanceList
+	# print positionList
+
+	# Print out a summary of the most similar results
+	for i in range(0,len(resultList)):
+		if i > 0 and resultList[i] != resultList[i-1]:
+			print
+		print positionList[i], distanceList[i], resultList[i]
+
+	dom.unlink()
+
+###############  XML and Network Utilities ###########
+# Send one XML SOAP command to the server.  Get back the response.
+
+def SendXMLCommand(message):
+	global debug
+	if debug:
+		print message
+		print
+
+	#Create a socket
+	sSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+	#Connect to server
+	sSock.connect((serverHost, serverPort))
+
+	#Send messages
+	sSock.send(message)
+	data = ""
+	# Now loop, while getting all the data we can get from the socket.
+	while True:
+		c = sSock.recv(4096)
+		if c == "":
+			break
+		data += c
+	if data == "":
+		print "No response from the audioDB server"
+		sys.exit(0)
+	# Split off the HTTP header and the data
+	header,response = data.split("\r\n\r\n", 1)
+	if debug:
+		print 'Client received: ',response
+
+	sSock.close()
+	return response
+
+	
+###############  Main Program - Figure out which query we want ###########
+
+# Argument processing scheme described at: http://docs.python.org/lib/module-getopt.html
+
+import sys
+if __name__=="__main__":
+	cmdname = sys.argv[0]
+	if len(sys.argv) == 1:
+		print "Syntax: " + sys.argv[0] + " -q feature_file pos len"
+		sys.exit(1)
+
+	queryType = sys.argv[1]
+	if queryType == '-s' or queryType == 'status':
+		response = RunStatusQuery()
+		for k, v in response.iteritems():
+			print k, v
+	elif queryType == '-q' or queryType == 'query':
+		RunSequenceQuery(sys.argv)
+	elif queryType == '-l' or queryType == 'list':
+		response = RunListQuery()
+		# print response
+		results = ParseListXML(response)
+		for (f,l) in results:
+			print "%s\t%s" % (f,l)
+	elif queryType == '-v' or queryType == 'vector':
+		response = RunShingleQuery()
+		
+		
--- a/audioDBws.h	Wed Oct 08 17:23:15 2008 +0000
+++ b/audioDBws.h	Fri Nov 07 12:48:26 2008 +0000
@@ -47,6 +47,14 @@
   unsigned int *Rlen;
 };
 
+struct adb__queryVector {
+  int dim; // dimensionality of the feature (d)
+  int __sizev; // l x d :
+  double *v;   // pointer to query data
+  int __sizep;
+  double *p;
+};
+
 struct adb__lisztResponse {
   struct adb__lisztResult result;
 };
@@ -61,3 +69,7 @@
 int adb__query(xsd__string dbName, xsd__string qKey, xsd__string keyList, xsd__string timesFileName, xsd__string powerFileName, xsd__int qType, xsd__int qPos, xsd__int pointNN, xsd__int segNN, xsd__int segLen, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse);
 
 int adb__sequenceQueryByKey(xsd__string dbName,xsd__string trackKey, xsd__string featureFileName, xsd__int queryType,xsd__string trackFileName,xsd__string timesFileName,xsd__int queryPoint,xsd__int pointNN,xsd__int trackNN,xsd__int sequenceLength,xsd__double radius,xsd__double absolute_threshold,xsd__int usingQueryPoint,xsd__int lsh_exact,struct adb__queryResponse &adbQueryResponse);
+
+// Query an audioDB database by vector (serialized), queryKey/featureFileName is here replaced with qVector
+int adb__shingleQuery(xsd__string dbName, struct adb__queryVector qVector, xsd__string keyList, xsd__string timesFileName, xsd__int queryType, xsd__int queryPos, xsd__int pointNN, xsd__int trackNN, xsd__int sequenceLength, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse);
+
--- a/soap.cpp	Wed Oct 08 17:23:15 2008 +0000
+++ b/soap.cpp	Fri Nov 07 12:48:26 2008 +0000
@@ -282,7 +282,6 @@
   }
   argv[argv_counter] = NULL;
 
-
   try {
     audioDB(argc, (char* const*)argv, &adbQueryResponse);
     delete [] argv;
@@ -359,7 +358,75 @@
     return SOAP_FAULT;
   }
 }
-
+
+// Query an audioDB database by vector (serialized)
+int adb__shingleQuery(struct soap* soap, xsd__string dbName, struct adb__queryVector qVector, xsd__string keyList, xsd__string timesFileName, xsd__int queryType, xsd__int queryPos, xsd__int pointNN, xsd__int trackNN, xsd__int sequenceLength, xsd__double radius, xsd__double absolute_threshold, xsd__double relative_threshold, xsd__int exhaustive, xsd__int lsh_exact, struct adb__queryResponse &adbQueryResponse){
+
+  // open a tmp file on the server, write shingle, query as a file with query point 0
+  // and shingle length l/dim
+  char tmpFileName[] = "/tmp/adb_XXXXXX";
+  int tmpFid = mkstemp(tmpFileName);
+  if(tmpFid==-1){
+    cerr << "Cannot make tmpfile <" << tmpFileName << "> on server" << endl;
+    return SOAP_FAULT;
+  }
+
+ FILE* tmpFile = fdopen(tmpFid, "r+b");
+  if(!tmpFile){
+    cerr << "error opening <" << tmpFileName << "> for write" << endl;
+    return SOAP_FAULT;
+  }
+
+  if(fwrite(&qVector.dim, sizeof(int), 1, tmpFile)!=1){
+    cerr << "error writing tmp file dim <"<< tmpFileName << ">" << endl;
+    return SOAP_FAULT;
+  }
+
+  if(fwrite(qVector.v, sizeof(double), qVector.__sizev, tmpFile)!=(size_t)qVector.__sizev){
+    cerr << "error writing tmp file doubles <" << tmpFileName << ">" << endl;
+    return SOAP_FAULT;
+  }
+
+  // Close the file so that a new FD can be opened
+  fclose(tmpFile);
+
+  char tmpFileName2[] = "/tmp/adbP_XXXXXX";
+  int tmpFid2 = 0;
+  FILE* tmpFile2 = NULL;
+
+  // Check if powers have been passed and write accordingly
+  if(qVector.__sizep){
+    tmpFid2 = mkstemp(tmpFileName2);
+    tmpFile2 = fdopen(tmpFid2, "r+b");
+    if(!tmpFile2){
+      cerr << "error opening power file <" << tmpFileName2 << "> for write" << endl;
+      return SOAP_FAULT;
+    }
+    int pSize=1;
+    if(fwrite(&pSize, sizeof(int), 1, tmpFile2)!=1){
+      cerr << "error writing tmp power file dim <"<< tmpFileName2 << ">" << endl;
+      return SOAP_FAULT;
+    }
+    
+    if(fwrite(qVector.p, sizeof(double), qVector.__sizep, tmpFile2)!=(size_t)qVector.__sizep){
+      cerr << "error writing tmp power file doubles <" << tmpFileName2 << ">" << endl;
+      return SOAP_FAULT;
+    }
+    fclose(tmpFile2);
+  }
+
+  // fix up sequenceLength if it isn't provided, we know what the caller wants by the size of the shingle
+  // and the feature dimensionality
+  if(!sequenceLength)
+    sequenceLength = qVector.__sizev/qVector.dim;
+
+  int retVal = adb__query(soap, dbName, tmpFileName, keyList, timesFileName, qVector.__sizep?tmpFileName2:0,
+			  queryType, queryPos, pointNN, trackNN, sequenceLength, radius, 
+			  absolute_threshold, relative_threshold, exhaustive, lsh_exact, adbQueryResponse);
+
+  return retVal;
+}
+
 /* Server loop */
 void audioDB::startServer(){
   struct soap soap;