changeset 756:9b75573be3b9 mkc_lsh_update

Copied trunk python bindings -r 1110 to this branch.
author mas01mc
date Fri, 26 Nov 2010 06:58:55 +0000
parents 9bd13c7819ae
children ee612b7bd922
files bindings/python/pyadb.py bindings/python/pyadbmodule.c
diffstat 2 files changed, 430 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/bindings/python/pyadb.py	Thu Nov 25 13:42:40 2010 +0000
+++ b/bindings/python/pyadb.py	Fri Nov 26 06:58:55 2010 +0000
@@ -72,11 +72,13 @@
 				args["power"]=powerFile
 			elif featData.any():
 				args["power"]=powerData
+		if timesData != None:
+			self.hasTimes=True
 		if self.hasTimes:
 			if featFile:
 				args["times"]=timesFile
 			elif timesData.any():
-				pass
+				args["times"]=timesData
 		if key:
 			args["key"]=str(key)
 		if featFile:
@@ -94,6 +96,11 @@
 				if (len(args["power"].shape) == 1) : 
 					args["power"] = args["power"].reshape((args["power"].shape[0],1))
 				args["power"] = args["power"].flatten()
+			if(self.hasTimes and timesData != None):
+				if (len(args["times"].shape) == 1) : 
+					args["times"] = args["times"].reshape((args["times"].shape[0],1))
+				args["times"] = args["times"].flatten()
+
 			print "args: " + str(args)
 			ok = _pyadb._pyadb_insertFromArray(**args)
 			if not (ok==0):
@@ -149,6 +156,26 @@
 		elif featData:
 			raise NotImplementedError("direct data query not yet implemented.  Sorry.")
 		return Pyadb.Result(result, self.configQuery)
+
+	def query_data(self, featData=None, powerData=None, timesData=None, strictConfig=True):
+		"""query the database using numpy arrays. required data: featData, optional data: [powerData, timesData]Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method."""
+		if not self.configCheck():
+			if strictConfig:
+				raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\
+Only keys found in Pyadb.validConfigTerms may be defined")
+			else:
+				print "configQuery dict contains unsupported terms and strict configure mode is off.\n\
+Only keys found in Pyadb.validConfigTerms should be defined.  Removing invalid terms and proceeding..."
+				self.configCheck(scrub=True)
+		cq = self.configQuery.copy()
+		if (featData==None):
+			raise Usage("query requires featData to be defined.")
+		if(powerData!=None):
+			cq['power']=powerData
+		if(timesData!=None):
+			cq['times']=timesData
+		result = _pyadb._pyadb_queryFromData(self._db, featData, **cq)
+		return Pyadb.Result(result, self.configQuery)
 	
 	def status(self):
 		'''update attributes and return them as a dict'''
@@ -192,6 +219,26 @@
 		def __repr__(self):
 			return repr(self.rawData)
 
+	def liszt(self):
+		'''run _pyadb_liszt to get a list of database keys'''
+		if self._db != None:
+			return _pyadb._pyadb_liszt(self._db)
+		else:
+			print "Error in liszt(): ADB database not defined"
+			return 0
+
+	def retrieve_datum(self, key, **args):
+		'''run _pyadb_retrieveDatum to retrieve data by key:
+		      features=True, to get features
+		      powers=True, to get Powers
+		      times=True, to get Times
+		'''
+		if self._db != None:
+			return _pyadb._pyadb_retrieveDatum(self._db, key=key, **args)
+		else:
+			print "Error in liszt(): ADB database not defined"
+			return 0
+		
 class untitledTests(unittest.TestCase):
 	def setUp(self):
 		pass
--- a/bindings/python/pyadbmodule.c	Thu Nov 25 13:42:40 2010 +0000
+++ b/bindings/python/pyadbmodule.c	Fri Nov 26 06:58:55 2010 +0000
@@ -208,8 +208,8 @@
 			return NULL;
 		}
 		// times = (PyArrayObject *)PyCObject_AsVoidPtr(incomingTime);
-		if (PyArray_NDIM(times) != 1 || PyArray_DIMS(times)[0] == (nVect*2)){
-			PyErr_SetString(PyExc_ValueError, "times, if given must be a 1d numpy array with shape =  (numVectors,)");
+		if (PyArray_NDIM(times) != 1 || PyArray_DIMS(times)[0] != (nVect*2)){
+			PyErr_SetString(PyExc_ValueError, "times, if given must be a 1d numpy array with shape =  (numVectors*2,)");
 			return NULL;
 		}
 	}
@@ -335,7 +335,7 @@
 	adb_t *current_db;
 	adb_query_spec_t *spec;
 	adb_query_results_t *result;
-	int ok, exhaustive=0, falsePositives=0;
+	int ok, exhaustive, falsePositives;
 	uint32_t i;
 	const char *key;
 	const char *accuMode = "db";
@@ -407,22 +407,22 @@
 	}
 	
 	//set up spec->params
-	if (strcmp(accuMode,"db")){
+	if (strcmp(accuMode,"db")==0){
 		spec->params.accumulation = ADB_ACCUMULATION_DB;
-	} else if (strcmp(accuMode,"track")){
+	} else if (strcmp(accuMode,"track")==0){
 		spec->params.accumulation = ADB_ACCUMULATION_PER_TRACK;
-	} else if (strcmp(accuMode,"one2one")){
+	} else if (strcmp(accuMode,"one2one")==0){
 		spec->params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
 	} else{
 		PyErr_SetString(PyExc_ValueError, 
 			"Poorly specified distance mode. distance must either be \'db\', \'track\' or  \'one2one\'.\n");
 		return NULL;
 	}
-	if (strcmp(distMode, "dot")){
+	if (strcmp(distMode, "dot")==0){
 		spec->params.distance = ADB_DISTANCE_DOT_PRODUCT;
-	}else if (strcmp(distMode, "eucNorm")){
+	}else if (strcmp(distMode, "eucNorm")==0){
 		spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
-	}else if (strcmp(distMode, "euclidean")){
+	}else if (strcmp(distMode, "euclidean")==0){
 		spec->params.distance = ADB_DISTANCE_EUCLIDEAN;
 	}else{
 		PyErr_SetString(PyExc_ValueError, 
@@ -578,6 +578,354 @@
 	
 }
 
+/* Data query.  
+ * Returns a dict that is result ordered and key = result key
+ * value is a list of tuples one per result associated with that key, of the form:
+ *   (dist, qpos, ipos)
+ * api call:
+ * adb_query_results_t *audiodb_query_spec(adb_t *, const adb_query_spec_t *);
+ ***/
+PyObject * _pyadb_queryFromData(PyObject *self, PyObject *args, PyObject *keywds)
+{
+	adb_t *current_db;
+	adb_query_spec_t *spec;
+	adb_query_results_t *result;
+	int ok, exhaustive, falsePositives;
+	uint32_t i;
+	const char *accuMode = "db";
+	const char *distMode = "dot";
+	const char *resFmt = "dict";
+	uint32_t hop = 0;
+	double radius = 0;
+	double absThres = 0; 
+	double relThres = 0; 
+	double durRatio = 0;
+	PyObject *includeKeys = NULL;
+	PyObject *excludeKeys = NULL;
+	PyObject *incoming = NULL;
+	PyObject *outgoing = NULL;
+	PyObject *thisKey = NULL;
+	PyObject *currentValue = NULL;
+	PyObject *newBits = NULL;
+	npy_intp dims[2];
+	unsigned int nDims = 0;
+	unsigned int nVect = 0;
+	PyArrayObject *features = NULL;
+	PyArrayObject *power = NULL;
+	PyArrayObject *times = NULL;
+	PyArray_Descr *descr;
+	adb_status_t *status;
+
+	static char *kwlist[]  = { "db", "features", 
+				   "seqLength", 
+				   "seqStart", 
+				   "exhaustive", 
+				   "falsePositives",
+				   "accumulation",
+				   "distance",
+				   "npoints",//nearest neighbor points per track
+				   "ntracks",
+				   "includeKeys",
+				   "excludeKeys",
+				   "radius",
+				   "absThres",
+				   "relThres",
+				   "durRatio",
+				   "hopSize",
+				   "resFmt",
+				   "power",
+				   "times",
+				   NULL
+	};
+
+	spec = (adb_query_spec_t *)malloc(sizeof(adb_query_spec_t));
+	spec->qid.datum = (adb_datum_t *)malloc(sizeof(adb_datum_t));
+	result = (adb_query_results_t *)malloc(sizeof(adb_query_results_t));
+	
+	spec->qid.sequence_length = 16;
+	spec->qid.sequence_start = 0;
+	spec->qid.flags = 0;
+	spec->params.npoints = 1;
+	spec->params.ntracks = 100;//number of results returned in db mode
+	spec->refine.flags = 0;
+	
+	ok =  PyArg_ParseTupleAndKeywords(args, keywds, "OO!|iiiissIIOOddddIsO!O!", kwlist, 
+					  &incoming, &PyArray_Type, &features, 
+					  &spec->qid.sequence_length, 
+					  &spec->qid.sequence_start, 
+					  &exhaustive, &falsePositives,
+					  &accuMode,&distMode,
+					  &spec->params.npoints,
+					  &spec->params.ntracks,
+					  &includeKeys, &excludeKeys,
+					  &radius, &absThres, &relThres, &durRatio, &hop,
+					  &resFmt, 
+					  &PyArray_Type, &power, &PyArray_Type, &times
+												);
+	
+	if (!ok) {return NULL;}
+	current_db = (adb_t *)PyCObject_AsVoidPtr(incoming);
+
+	if (!features){ /* Sanity Check */
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: function requires feature data as numpy ndarray. PythonC required keyword check failed.\n");
+	  return NULL;
+	}
+
+	/* Check the dimensionality of passed data agrees with the passed database */
+	if(PyArray_NDIM(features)!=2){
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: passed features have incorrect shape, should be (nVecs, nDims).\n");
+	  return NULL;
+	}
+
+
+	if(power && PyArray_NDIM(power)!=1){
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: passed power have incorrect shape, should be (nVecs,).\n");
+	  return NULL;
+	}
+
+	if(times && PyArray_NDIM(times)!=1){
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: passed times have incorrect shape, should be (nVecs,).\n");
+	  return NULL;
+	}
+
+	status = (adb_status_t*) malloc(sizeof(adb_status_t));
+	int errtest = audiodb_status(current_db, status);
+	if(errtest){
+	  PyErr_SetString(PyExc_TypeError, "queryFromData failed: could not get status of passed ADB database");
+	  free(status);
+	  return NULL;
+	}
+
+	if(!PyArray_DIMS(features)[1]==status->dim){
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: passed features have incorrect dimensionality.\n");
+	  free(status);
+	  return NULL;
+	}
+
+	if(power && PyArray_DIMS(power)[0] != PyArray_DIMS(features)[0]){
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: passed power and features have incompatible nVecs dimension.\n");
+	  free(status);
+	  return NULL;
+	}
+
+	if(times && PyArray_DIMS(times)[0] != PyArray_DIMS(features)[0]){
+	  PyErr_SetString(PyExc_ValueError, 
+			  "queryFromData: passed times and features have incompatible nVecs dimension.\n");
+	  free(status);
+	  return NULL;
+	}
+
+	free(status);
+
+	
+	if (exhaustive){
+		spec->qid.flags = spec->qid.flags | ADB_QID_FLAG_EXHAUSTIVE;
+	}
+	if (falsePositives){
+		spec->qid.flags = spec->qid.flags | ADB_QID_FLAG_ALLOW_FALSE_POSITIVES;
+	}
+	
+	//set up spec->params
+	if (strcmp(accuMode,"db")==0){
+		spec->params.accumulation = ADB_ACCUMULATION_DB;
+	} else if (strcmp(accuMode,"track")==0){
+		spec->params.accumulation = ADB_ACCUMULATION_PER_TRACK;
+	} else if (strcmp(accuMode,"one2one")==0){
+		spec->params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE;
+	} else{
+		PyErr_SetString(PyExc_ValueError, 
+			"Poorly specified distance mode. distance must either be \'db\', \'track\' or  \'one2one\'.\n");
+		return NULL;
+	}
+	if (strcmp(distMode, "dot")==0){
+		spec->params.distance = ADB_DISTANCE_DOT_PRODUCT;
+	}else if (strcmp(distMode, "eucNorm")==0){
+		spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED;
+	}else if (strcmp(distMode, "euclidean")==0){
+		spec->params.distance = ADB_DISTANCE_EUCLIDEAN;
+	}else{
+		PyErr_SetString(PyExc_ValueError, 
+			"Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' or  \'euclidean\'.\n");
+		return NULL;
+	}
+	
+	//set up spec->refine
+	//include/exclude keys
+	if (includeKeys){
+		if (!PyList_Check(includeKeys)){
+			PyErr_SetString(PyExc_TypeError, "Include keys must be specified as a list of strings.\n");
+			return NULL;
+		}
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_INCLUDE_KEYLIST;
+		spec->refine.include.nkeys = (uint32_t)PyList_Size(includeKeys);
+		spec->refine.include.keys = (const char **)calloc(sizeof(const char *), spec->refine.include.nkeys);
+		for (i=0;i<spec->refine.include.nkeys;i++){
+			 if (PyString_Check(PyList_GetItem(includeKeys, (Py_ssize_t)i))){
+				spec->refine.include.keys[i] = PyString_AsString(PyList_GetItem(includeKeys, (Py_ssize_t)i));
+			}else{
+				PyErr_SetString(PyExc_TypeError, "Include keys must each be specified as a string.\nFound one that was not.\n");
+				return NULL;
+			}
+		}
+	}
+	if (excludeKeys){
+		if (!PyList_Check(excludeKeys)){
+			PyErr_SetString(PyExc_TypeError, "Exclude keys must be specified as a list of strings.\n");
+			return NULL;
+		}
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_EXCLUDE_KEYLIST;
+		spec->refine.exclude.nkeys = (uint32_t)PyList_Size(excludeKeys);
+		spec->refine.exclude.keys = (const char **)calloc(sizeof(const char *), spec->refine.exclude.nkeys);
+		for (i=0;i<spec->refine.exclude.nkeys;i++){
+			 if (PyString_Check(PyList_GetItem(excludeKeys, (Py_ssize_t)i))){
+				spec->refine.exclude.keys[i] = PyString_AsString(PyList_GetItem(excludeKeys, (Py_ssize_t)i));
+			}else{
+				PyErr_SetString(PyExc_TypeError, "Exclude keys must each be specified as a string.\nFound one that was not.\n");
+				return NULL;
+			}
+		}
+	}
+	//the rest of spec->refine 
+	if (radius){
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_RADIUS;
+		spec->refine.radius = radius;
+	}
+	if (absThres){
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_ABSOLUTE_THRESHOLD;
+		spec->refine.absolute_threshold = absThres;
+	}
+	if (relThres){
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_RELATIVE_THRESHOLD;
+		spec->refine.relative_threshold = relThres;
+	}
+	if (durRatio){
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_DURATION_RATIO;
+		spec->refine.duration_ratio = durRatio;
+	}
+	if (hop){
+		spec->refine.flags = spec->refine.flags | ADB_REFINE_HOP_SIZE;
+                /* not ideal but a temporary bandage fix */
+		spec->refine.qhopsize = hop;
+		spec->refine.ihopsize = hop;
+	}
+
+	descr = PyArray_DescrFromType(NPY_DOUBLE);
+
+	if (PyArray_AsCArray(&features, &(spec->qid.datum->data), dims, PyArray_NDIM(features), descr)){
+	  PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the feature np array as a C array.");
+	  return NULL;
+	}
+	
+	if (power){
+	  if (PyArray_AsCArray(&power, &(spec->qid.datum->power), dims,  1, descr)){
+	    PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the power np array as a C array.");
+	    return NULL;
+	  }
+	}else{
+	  spec->qid.datum->power=NULL;
+	}
+	
+	if (times){
+	  if (PyArray_AsCArray(&times, &(spec->qid.datum->times), dims,  1, descr)){
+	    PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the times np array as a C array.");
+	    return NULL;
+	  }
+	}else{
+	  spec->qid.datum->times=NULL;
+	}
+
+	nVect = PyArray_DIMS(features)[0];
+	nDims = PyArray_DIMS(features)[1];
+	spec->qid.datum->nvectors = (uint32_t)nVect;
+	spec->qid.datum->dim = (uint32_t)nDims;
+
+	result = audiodb_query_spec(current_db, spec);
+
+
+	if (result == NULL){
+		PyErr_SetString(PyExc_RuntimeError, "Encountered an error while running the actual query, or there was nothing returned.\n");
+		return NULL;
+		}
+	if(strcmp(resFmt, "dict")==0){
+		outgoing  = PyDict_New();
+		for (i=0;i<result->nresults;i++){
+			thisKey = PyString_FromString(result->results[i].ikey);
+			if (!PyDict_Contains(outgoing, thisKey)){
+				newBits =  Py_BuildValue("[(dII)]",
+											result->results[i].dist, 
+											result->results[i].qpos, 
+											result->results[i].ipos);
+				if (PyDict_SetItem(outgoing, thisKey,newBits)){
+					printf("key : %s\ndist : %f\nqpos : %i\nipos : %i\n", result->results[i].ikey, result->results[i].dist, result->results[i].qpos, result->results[i].ipos);
+					PyErr_SetString(PyExc_AttributeError, "Error adding a tuple to the result dict\n");
+					Py_XDECREF(newBits);
+					return NULL;
+				}
+				Py_DECREF(newBits);
+			}else {
+				//the key already has a value, so we need to fetch the value, confirm it's a list and append another tuple to it.
+				currentValue = PyDict_GetItem(outgoing, thisKey);
+				if (!PyList_Check(currentValue)){
+					PyErr_SetString(PyExc_TypeError, "The result dictionary appears to be malformed.\n");
+					return NULL;
+				}
+				newBits = Py_BuildValue("dII",result->results[i].dist, 
+											result->results[i].qpos, 
+											result->results[i].ipos);
+				if (PyList_Append(currentValue,  newBits)){
+					//error msg here
+					Py_XDECREF(newBits);
+					return NULL;
+				}
+				if (PyDict_SetItem(outgoing, thisKey, newBits)){
+					PyErr_SetString(PyExc_AttributeError, "Error adding a tuple to the result dict\n");
+					Py_XDECREF(newBits);
+					return NULL;
+				}
+				Py_DECREF(newBits);
+		
+			}
+		}
+	}else if(strcmp(resFmt, "list")==0){
+		outgoing  = PyList_New((Py_ssize_t)0);
+		for (i=0;i<result->nresults;i++){
+			newBits = Py_BuildValue("sdII",result->results[i].ikey,
+										result->results[i].dist, 
+										result->results[i].qpos, 
+										result->results[i].ipos);
+			if (PyList_Append(outgoing,  newBits)){
+				//error msg here
+				Py_XDECREF(newBits);
+				return NULL;
+			}
+			Py_DECREF(newBits);
+		}
+		if(PyList_Reverse(outgoing)){//need to do this as things come off the accumulator backward.
+			PyErr_SetString(PyExc_RuntimeError,
+			"the reverse failed, hopefully a sensable error will follow.\nIf not, fix it.\n");
+			return NULL;
+			}
+	}else{
+		PyErr_SetString(PyExc_ValueError, 
+			"Poorly specified result mode. Result must be either \'dist\' or \'list\'.\n");
+		return NULL;
+	}
+	if (audiodb_query_free_results(current_db, spec, result)){
+		printf("bit of trouble freeing the result and spec...\ncheck for leaks.");
+	}
+	
+	return outgoing;
+	
+	
+	
+}
+
 
 /* retrieval of inserted data 
 * returned numpy array has ndarray.shape = (numVectors, numDims)
@@ -664,7 +1012,7 @@
 	if(features){
 	  if(ins->dim>1){
 	    dims=2;
-	    shape[1]= ins->dim;	    
+	    shape[1]= ins->dim;
 	  }
 	  else{
 	    dims=1;
@@ -683,17 +1031,29 @@
 	  data = ins->times;
 	}
 
-	outgoing = PyArray_SimpleNewFromData(dims, shape, NPY_DOUBLE, data);
-	free(status);
+	outgoing = PyArray_SimpleNew(dims, shape, NPY_DOUBLE);
+	if (!outgoing){
+	  free(status);
+	  free(ins); // free the malloced adb_datum_t structure though
+	  Py_XDECREF(outgoing);
+	  PyErr_SetString(PyExc_TypeError, "Failed to convert retrieved datum to C-Array");
+	  return NULL;
+	}	
+
+	/* Copy the data, this allows us to free the allocated memory and let
+	 * python do the subsequent garbage collection itself.
+	 */
+	int num_items = ins->nvectors;
+	if(dims>1){
+	  num_items *= shape[1];
+	}		    
+	double* p = (double*) PyArray_DATA(outgoing);
+	double* d = data;
+	while(num_items--)
+	  *p++ = *d++;	
+	audiodb_free_datum(current_db, ins); // free the source audiodb_datum
+	free(status); // free the malloced status object
 	free(ins); // free the malloced adb_datum_t structure though
-
-	if (!outgoing){
-	  PyErr_SetString(PyExc_TypeError, "Failed to convert retrieved datum to PyArray");
-	  return NULL;
-	}
-	// Apparently Python automatically INCREFs the data pointer, so we don't have to call
-	// audiodb_free_datum(current_db, ins);
-
 	return outgoing; 
 }
 
@@ -763,6 +1123,8 @@
 					durRatio      = double time expansion/compresion ratio,\n\
 					hopSize       = int hopsize (1 by default)])->resultDict\n\
 					resFmt        = [\"list\"|\"dict\"](\"dict\" by default)"},
+	{"_pyadb_queryFromData", (PyCFunction)_pyadb_queryFromData, METH_VARARGS | METH_KEYWORDS,
+	 "data query. Required features=F (numpy ndarray). Optional: power=P (numpy 1d array), times=T (numpy 1d array)"},
 	{NULL,NULL, 0, NULL}
 };