Mercurial > hg > audiodb
changeset 756:9b75573be3b9 mkc_lsh_update
Copied trunk python bindings -r 1110 to this branch.
author | mas01mc |
---|---|
date | Fri, 26 Nov 2010 06:58:55 +0000 |
parents | 9bd13c7819ae |
children | ee612b7bd922 |
files | bindings/python/pyadb.py bindings/python/pyadbmodule.c |
diffstat | 2 files changed, 430 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/bindings/python/pyadb.py Thu Nov 25 13:42:40 2010 +0000 +++ b/bindings/python/pyadb.py Fri Nov 26 06:58:55 2010 +0000 @@ -72,11 +72,13 @@ args["power"]=powerFile elif featData.any(): args["power"]=powerData + if timesData != None: + self.hasTimes=True if self.hasTimes: if featFile: args["times"]=timesFile elif timesData.any(): - pass + args["times"]=timesData if key: args["key"]=str(key) if featFile: @@ -94,6 +96,11 @@ if (len(args["power"].shape) == 1) : args["power"] = args["power"].reshape((args["power"].shape[0],1)) args["power"] = args["power"].flatten() + if(self.hasTimes and timesData != None): + if (len(args["times"].shape) == 1) : + args["times"] = args["times"].reshape((args["times"].shape[0],1)) + args["times"] = args["times"].flatten() + print "args: " + str(args) ok = _pyadb._pyadb_insertFromArray(**args) if not (ok==0): @@ -149,6 +156,26 @@ elif featData: raise NotImplementedError("direct data query not yet implemented. Sorry.") return Pyadb.Result(result, self.configQuery) + + def query_data(self, featData=None, powerData=None, timesData=None, strictConfig=True): + """query the database using numpy arrays. required data: featData, optional data: [powerData, timesData]Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method.""" + if not self.configCheck(): + if strictConfig: + raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\ +Only keys found in Pyadb.validConfigTerms may be defined") + else: + print "configQuery dict contains unsupported terms and strict configure mode is off.\n\ +Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..." + self.configCheck(scrub=True) + cq = self.configQuery.copy() + if (featData==None): + raise Usage("query requires featData to be defined.") + if(powerData!=None): + cq['power']=powerData + if(timesData!=None): + cq['times']=timesData + result = _pyadb._pyadb_queryFromData(self._db, featData, **cq) + return Pyadb.Result(result, self.configQuery) def status(self): '''update attributes and return them as a dict''' @@ -192,6 +219,26 @@ def __repr__(self): return repr(self.rawData) + def liszt(self): + '''run _pyadb_liszt to get a list of database keys''' + if self._db != None: + return _pyadb._pyadb_liszt(self._db) + else: + print "Error in liszt(): ADB database not defined" + return 0 + + def retrieve_datum(self, key, **args): + '''run _pyadb_retrieveDatum to retrieve data by key: + features=True, to get features + powers=True, to get Powers + times=True, to get Times + ''' + if self._db != None: + return _pyadb._pyadb_retrieveDatum(self._db, key=key, **args) + else: + print "Error in liszt(): ADB database not defined" + return 0 + class untitledTests(unittest.TestCase): def setUp(self): pass
--- a/bindings/python/pyadbmodule.c Thu Nov 25 13:42:40 2010 +0000 +++ b/bindings/python/pyadbmodule.c Fri Nov 26 06:58:55 2010 +0000 @@ -208,8 +208,8 @@ return NULL; } // times = (PyArrayObject *)PyCObject_AsVoidPtr(incomingTime); - if (PyArray_NDIM(times) != 1 || PyArray_DIMS(times)[0] == (nVect*2)){ - PyErr_SetString(PyExc_ValueError, "times, if given must be a 1d numpy array with shape = (numVectors,)"); + if (PyArray_NDIM(times) != 1 || PyArray_DIMS(times)[0] != (nVect*2)){ + PyErr_SetString(PyExc_ValueError, "times, if given must be a 1d numpy array with shape = (numVectors*2,)"); return NULL; } } @@ -335,7 +335,7 @@ adb_t *current_db; adb_query_spec_t *spec; adb_query_results_t *result; - int ok, exhaustive=0, falsePositives=0; + int ok, exhaustive, falsePositives; uint32_t i; const char *key; const char *accuMode = "db"; @@ -407,22 +407,22 @@ } //set up spec->params - if (strcmp(accuMode,"db")){ + if (strcmp(accuMode,"db")==0){ spec->params.accumulation = ADB_ACCUMULATION_DB; - } else if (strcmp(accuMode,"track")){ + } else if (strcmp(accuMode,"track")==0){ spec->params.accumulation = ADB_ACCUMULATION_PER_TRACK; - } else if (strcmp(accuMode,"one2one")){ + } else if (strcmp(accuMode,"one2one")==0){ spec->params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; } else{ PyErr_SetString(PyExc_ValueError, "Poorly specified distance mode. distance must either be \'db\', \'track\' or \'one2one\'.\n"); return NULL; } - if (strcmp(distMode, "dot")){ + if (strcmp(distMode, "dot")==0){ spec->params.distance = ADB_DISTANCE_DOT_PRODUCT; - }else if (strcmp(distMode, "eucNorm")){ + }else if (strcmp(distMode, "eucNorm")==0){ spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; - }else if (strcmp(distMode, "euclidean")){ + }else if (strcmp(distMode, "euclidean")==0){ spec->params.distance = ADB_DISTANCE_EUCLIDEAN; }else{ PyErr_SetString(PyExc_ValueError, @@ -578,6 +578,354 @@ } +/* Data query. + * Returns a dict that is result ordered and key = result key + * value is a list of tuples one per result associated with that key, of the form: + * (dist, qpos, ipos) + * api call: + * adb_query_results_t *audiodb_query_spec(adb_t *, const adb_query_spec_t *); + ***/ +PyObject * _pyadb_queryFromData(PyObject *self, PyObject *args, PyObject *keywds) +{ + adb_t *current_db; + adb_query_spec_t *spec; + adb_query_results_t *result; + int ok, exhaustive, falsePositives; + uint32_t i; + const char *accuMode = "db"; + const char *distMode = "dot"; + const char *resFmt = "dict"; + uint32_t hop = 0; + double radius = 0; + double absThres = 0; + double relThres = 0; + double durRatio = 0; + PyObject *includeKeys = NULL; + PyObject *excludeKeys = NULL; + PyObject *incoming = NULL; + PyObject *outgoing = NULL; + PyObject *thisKey = NULL; + PyObject *currentValue = NULL; + PyObject *newBits = NULL; + npy_intp dims[2]; + unsigned int nDims = 0; + unsigned int nVect = 0; + PyArrayObject *features = NULL; + PyArrayObject *power = NULL; + PyArrayObject *times = NULL; + PyArray_Descr *descr; + adb_status_t *status; + + static char *kwlist[] = { "db", "features", + "seqLength", + "seqStart", + "exhaustive", + "falsePositives", + "accumulation", + "distance", + "npoints",//nearest neighbor points per track + "ntracks", + "includeKeys", + "excludeKeys", + "radius", + "absThres", + "relThres", + "durRatio", + "hopSize", + "resFmt", + "power", + "times", + NULL + }; + + spec = (adb_query_spec_t *)malloc(sizeof(adb_query_spec_t)); + spec->qid.datum = (adb_datum_t *)malloc(sizeof(adb_datum_t)); + result = (adb_query_results_t *)malloc(sizeof(adb_query_results_t)); + + spec->qid.sequence_length = 16; + spec->qid.sequence_start = 0; + spec->qid.flags = 0; + spec->params.npoints = 1; + spec->params.ntracks = 100;//number of results returned in db mode + spec->refine.flags = 0; + + ok = PyArg_ParseTupleAndKeywords(args, keywds, "OO!|iiiissIIOOddddIsO!O!", kwlist, + &incoming, &PyArray_Type, &features, + &spec->qid.sequence_length, + &spec->qid.sequence_start, + &exhaustive, &falsePositives, + &accuMode,&distMode, + &spec->params.npoints, + &spec->params.ntracks, + &includeKeys, &excludeKeys, + &radius, &absThres, &relThres, &durRatio, &hop, + &resFmt, + &PyArray_Type, &power, &PyArray_Type, × + ); + + if (!ok) {return NULL;} + current_db = (adb_t *)PyCObject_AsVoidPtr(incoming); + + if (!features){ /* Sanity Check */ + PyErr_SetString(PyExc_ValueError, + "queryFromData: function requires feature data as numpy ndarray. PythonC required keyword check failed.\n"); + return NULL; + } + + /* Check the dimensionality of passed data agrees with the passed database */ + if(PyArray_NDIM(features)!=2){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed features have incorrect shape, should be (nVecs, nDims).\n"); + return NULL; + } + + + if(power && PyArray_NDIM(power)!=1){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed power have incorrect shape, should be (nVecs,).\n"); + return NULL; + } + + if(times && PyArray_NDIM(times)!=1){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed times have incorrect shape, should be (nVecs,).\n"); + return NULL; + } + + status = (adb_status_t*) malloc(sizeof(adb_status_t)); + int errtest = audiodb_status(current_db, status); + if(errtest){ + PyErr_SetString(PyExc_TypeError, "queryFromData failed: could not get status of passed ADB database"); + free(status); + return NULL; + } + + if(!PyArray_DIMS(features)[1]==status->dim){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed features have incorrect dimensionality.\n"); + free(status); + return NULL; + } + + if(power && PyArray_DIMS(power)[0] != PyArray_DIMS(features)[0]){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed power and features have incompatible nVecs dimension.\n"); + free(status); + return NULL; + } + + if(times && PyArray_DIMS(times)[0] != PyArray_DIMS(features)[0]){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed times and features have incompatible nVecs dimension.\n"); + free(status); + return NULL; + } + + free(status); + + + if (exhaustive){ + spec->qid.flags = spec->qid.flags | ADB_QID_FLAG_EXHAUSTIVE; + } + if (falsePositives){ + spec->qid.flags = spec->qid.flags | ADB_QID_FLAG_ALLOW_FALSE_POSITIVES; + } + + //set up spec->params + if (strcmp(accuMode,"db")==0){ + spec->params.accumulation = ADB_ACCUMULATION_DB; + } else if (strcmp(accuMode,"track")==0){ + spec->params.accumulation = ADB_ACCUMULATION_PER_TRACK; + } else if (strcmp(accuMode,"one2one")==0){ + spec->params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; + } else{ + PyErr_SetString(PyExc_ValueError, + "Poorly specified distance mode. distance must either be \'db\', \'track\' or \'one2one\'.\n"); + return NULL; + } + if (strcmp(distMode, "dot")==0){ + spec->params.distance = ADB_DISTANCE_DOT_PRODUCT; + }else if (strcmp(distMode, "eucNorm")==0){ + spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; + }else if (strcmp(distMode, "euclidean")==0){ + spec->params.distance = ADB_DISTANCE_EUCLIDEAN; + }else{ + PyErr_SetString(PyExc_ValueError, + "Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' or \'euclidean\'.\n"); + return NULL; + } + + //set up spec->refine + //include/exclude keys + if (includeKeys){ + if (!PyList_Check(includeKeys)){ + PyErr_SetString(PyExc_TypeError, "Include keys must be specified as a list of strings.\n"); + return NULL; + } + spec->refine.flags = spec->refine.flags | ADB_REFINE_INCLUDE_KEYLIST; + spec->refine.include.nkeys = (uint32_t)PyList_Size(includeKeys); + spec->refine.include.keys = (const char **)calloc(sizeof(const char *), spec->refine.include.nkeys); + for (i=0;i<spec->refine.include.nkeys;i++){ + if (PyString_Check(PyList_GetItem(includeKeys, (Py_ssize_t)i))){ + spec->refine.include.keys[i] = PyString_AsString(PyList_GetItem(includeKeys, (Py_ssize_t)i)); + }else{ + PyErr_SetString(PyExc_TypeError, "Include keys must each be specified as a string.\nFound one that was not.\n"); + return NULL; + } + } + } + if (excludeKeys){ + if (!PyList_Check(excludeKeys)){ + PyErr_SetString(PyExc_TypeError, "Exclude keys must be specified as a list of strings.\n"); + return NULL; + } + spec->refine.flags = spec->refine.flags | ADB_REFINE_EXCLUDE_KEYLIST; + spec->refine.exclude.nkeys = (uint32_t)PyList_Size(excludeKeys); + spec->refine.exclude.keys = (const char **)calloc(sizeof(const char *), spec->refine.exclude.nkeys); + for (i=0;i<spec->refine.exclude.nkeys;i++){ + if (PyString_Check(PyList_GetItem(excludeKeys, (Py_ssize_t)i))){ + spec->refine.exclude.keys[i] = PyString_AsString(PyList_GetItem(excludeKeys, (Py_ssize_t)i)); + }else{ + PyErr_SetString(PyExc_TypeError, "Exclude keys must each be specified as a string.\nFound one that was not.\n"); + return NULL; + } + } + } + //the rest of spec->refine + if (radius){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_RADIUS; + spec->refine.radius = radius; + } + if (absThres){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_ABSOLUTE_THRESHOLD; + spec->refine.absolute_threshold = absThres; + } + if (relThres){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_RELATIVE_THRESHOLD; + spec->refine.relative_threshold = relThres; + } + if (durRatio){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_DURATION_RATIO; + spec->refine.duration_ratio = durRatio; + } + if (hop){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_HOP_SIZE; + /* not ideal but a temporary bandage fix */ + spec->refine.qhopsize = hop; + spec->refine.ihopsize = hop; + } + + descr = PyArray_DescrFromType(NPY_DOUBLE); + + if (PyArray_AsCArray(&features, &(spec->qid.datum->data), dims, PyArray_NDIM(features), descr)){ + PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the feature np array as a C array."); + return NULL; + } + + if (power){ + if (PyArray_AsCArray(&power, &(spec->qid.datum->power), dims, 1, descr)){ + PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the power np array as a C array."); + return NULL; + } + }else{ + spec->qid.datum->power=NULL; + } + + if (times){ + if (PyArray_AsCArray(×, &(spec->qid.datum->times), dims, 1, descr)){ + PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the times np array as a C array."); + return NULL; + } + }else{ + spec->qid.datum->times=NULL; + } + + nVect = PyArray_DIMS(features)[0]; + nDims = PyArray_DIMS(features)[1]; + spec->qid.datum->nvectors = (uint32_t)nVect; + spec->qid.datum->dim = (uint32_t)nDims; + + result = audiodb_query_spec(current_db, spec); + + + if (result == NULL){ + PyErr_SetString(PyExc_RuntimeError, "Encountered an error while running the actual query, or there was nothing returned.\n"); + return NULL; + } + if(strcmp(resFmt, "dict")==0){ + outgoing = PyDict_New(); + for (i=0;i<result->nresults;i++){ + thisKey = PyString_FromString(result->results[i].ikey); + if (!PyDict_Contains(outgoing, thisKey)){ + newBits = Py_BuildValue("[(dII)]", + result->results[i].dist, + result->results[i].qpos, + result->results[i].ipos); + if (PyDict_SetItem(outgoing, thisKey,newBits)){ + printf("key : %s\ndist : %f\nqpos : %i\nipos : %i\n", result->results[i].ikey, result->results[i].dist, result->results[i].qpos, result->results[i].ipos); + PyErr_SetString(PyExc_AttributeError, "Error adding a tuple to the result dict\n"); + Py_XDECREF(newBits); + return NULL; + } + Py_DECREF(newBits); + }else { + //the key already has a value, so we need to fetch the value, confirm it's a list and append another tuple to it. + currentValue = PyDict_GetItem(outgoing, thisKey); + if (!PyList_Check(currentValue)){ + PyErr_SetString(PyExc_TypeError, "The result dictionary appears to be malformed.\n"); + return NULL; + } + newBits = Py_BuildValue("dII",result->results[i].dist, + result->results[i].qpos, + result->results[i].ipos); + if (PyList_Append(currentValue, newBits)){ + //error msg here + Py_XDECREF(newBits); + return NULL; + } + if (PyDict_SetItem(outgoing, thisKey, newBits)){ + PyErr_SetString(PyExc_AttributeError, "Error adding a tuple to the result dict\n"); + Py_XDECREF(newBits); + return NULL; + } + Py_DECREF(newBits); + + } + } + }else if(strcmp(resFmt, "list")==0){ + outgoing = PyList_New((Py_ssize_t)0); + for (i=0;i<result->nresults;i++){ + newBits = Py_BuildValue("sdII",result->results[i].ikey, + result->results[i].dist, + result->results[i].qpos, + result->results[i].ipos); + if (PyList_Append(outgoing, newBits)){ + //error msg here + Py_XDECREF(newBits); + return NULL; + } + Py_DECREF(newBits); + } + if(PyList_Reverse(outgoing)){//need to do this as things come off the accumulator backward. + PyErr_SetString(PyExc_RuntimeError, + "the reverse failed, hopefully a sensable error will follow.\nIf not, fix it.\n"); + return NULL; + } + }else{ + PyErr_SetString(PyExc_ValueError, + "Poorly specified result mode. Result must be either \'dist\' or \'list\'.\n"); + return NULL; + } + if (audiodb_query_free_results(current_db, spec, result)){ + printf("bit of trouble freeing the result and spec...\ncheck for leaks."); + } + + return outgoing; + + + +} + /* retrieval of inserted data * returned numpy array has ndarray.shape = (numVectors, numDims) @@ -664,7 +1012,7 @@ if(features){ if(ins->dim>1){ dims=2; - shape[1]= ins->dim; + shape[1]= ins->dim; } else{ dims=1; @@ -683,17 +1031,29 @@ data = ins->times; } - outgoing = PyArray_SimpleNewFromData(dims, shape, NPY_DOUBLE, data); - free(status); + outgoing = PyArray_SimpleNew(dims, shape, NPY_DOUBLE); + if (!outgoing){ + free(status); + free(ins); // free the malloced adb_datum_t structure though + Py_XDECREF(outgoing); + PyErr_SetString(PyExc_TypeError, "Failed to convert retrieved datum to C-Array"); + return NULL; + } + + /* Copy the data, this allows us to free the allocated memory and let + * python do the subsequent garbage collection itself. + */ + int num_items = ins->nvectors; + if(dims>1){ + num_items *= shape[1]; + } + double* p = (double*) PyArray_DATA(outgoing); + double* d = data; + while(num_items--) + *p++ = *d++; + audiodb_free_datum(current_db, ins); // free the source audiodb_datum + free(status); // free the malloced status object free(ins); // free the malloced adb_datum_t structure though - - if (!outgoing){ - PyErr_SetString(PyExc_TypeError, "Failed to convert retrieved datum to PyArray"); - return NULL; - } - // Apparently Python automatically INCREFs the data pointer, so we don't have to call - // audiodb_free_datum(current_db, ins); - return outgoing; } @@ -763,6 +1123,8 @@ durRatio = double time expansion/compresion ratio,\n\ hopSize = int hopsize (1 by default)])->resultDict\n\ resFmt = [\"list\"|\"dict\"](\"dict\" by default)"}, + {"_pyadb_queryFromData", (PyCFunction)_pyadb_queryFromData, METH_VARARGS | METH_KEYWORDS, + "data query. Required features=F (numpy ndarray). Optional: power=P (numpy 1d array), times=T (numpy 1d array)"}, {NULL,NULL, 0, NULL} };