# HG changeset patch # User mas01mc # Date 1290626992 0 # Node ID d93292ae7c1ba86a0771cb6ed456b7aaedabac0f # Parent dd4b9fec8d856767c7253e88d5c84e6bc8170ebc Fixed accumulation and distance options (strcmp==0) and added times. Added queryFromData diff -r dd4b9fec8d85 -r d93292ae7c1b bindings/python/pyadb.py --- a/bindings/python/pyadb.py Wed Nov 24 13:50:05 2010 +0000 +++ b/bindings/python/pyadb.py Wed Nov 24 19:29:52 2010 +0000 @@ -156,6 +156,26 @@ elif featData: raise NotImplementedError("direct data query not yet implemented. Sorry.") return Pyadb.Result(result, self.configQuery) + + def query_data(self, featData=None, powerData=None, timesData=None, strictConfig=True): + """query the database using numpy arrays. required data: featData, optional data: [powerData, timesData]Query parameters as defined in self.configQuery. For details on this consult the doc string in the configCheck method.""" + if not self.configCheck(): + if strictConfig: + raise ValueError("configQuery dict contains unsupported terms and strict configure mode is on.\n\ +Only keys found in Pyadb.validConfigTerms may be defined") + else: + print "configQuery dict contains unsupported terms and strict configure mode is off.\n\ +Only keys found in Pyadb.validConfigTerms should be defined. Removing invalid terms and proceeding..." + self.configCheck(scrub=True) + cq = self.configQuery.copy() + if (featData==None): + raise Usage("query requires featData to be defined.") + if(powerData!=None): + cq['power']=powerData + if(timesData!=None): + cq['times']=timesData + result = _pyadb._pyadb_queryFromData(self._db, featData, **cq) + return Pyadb.Result(result, self.configQuery) def status(self): '''update attributes and return them as a dict''' diff -r dd4b9fec8d85 -r d93292ae7c1b bindings/python/pyadbmodule.c --- a/bindings/python/pyadbmodule.c Wed Nov 24 13:50:05 2010 +0000 +++ b/bindings/python/pyadbmodule.c Wed Nov 24 19:29:52 2010 +0000 @@ -407,22 +407,22 @@ } //set up spec->params - if (strcmp(accuMode,"db")){ + if (strcmp(accuMode,"db")==0){ spec->params.accumulation = ADB_ACCUMULATION_DB; - } else if (strcmp(accuMode,"track")){ + } else if (strcmp(accuMode,"track")==0){ spec->params.accumulation = ADB_ACCUMULATION_PER_TRACK; - } else if (strcmp(accuMode,"one2one")){ + } else if (strcmp(accuMode,"one2one")==0){ spec->params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; } else{ PyErr_SetString(PyExc_ValueError, "Poorly specified distance mode. distance must either be \'db\', \'track\' or \'one2one\'.\n"); return NULL; } - if (strcmp(distMode, "dot")){ + if (strcmp(distMode, "dot")==0){ spec->params.distance = ADB_DISTANCE_DOT_PRODUCT; - }else if (strcmp(distMode, "eucNorm")){ + }else if (strcmp(distMode, "eucNorm")==0){ spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; - }else if (strcmp(distMode, "euclidean")){ + }else if (strcmp(distMode, "euclidean")==0){ spec->params.distance = ADB_DISTANCE_EUCLIDEAN; }else{ PyErr_SetString(PyExc_ValueError, @@ -578,6 +578,353 @@ } +/* Data query. + * Returns a dict that is result ordered and key = result key + * value is a list of tuples one per result associated with that key, of the form: + * (dist, qpos, ipos) + * api call: + * adb_query_results_t *audiodb_query_spec(adb_t *, const adb_query_spec_t *); + ***/ +PyObject * _pyadb_queryFromData(PyObject *self, PyObject *args, PyObject *keywds) +{ + adb_t *current_db; + adb_query_spec_t *spec; + adb_query_results_t *result; + int ok, exhaustive, falsePositives; + uint32_t i; + const char *accuMode = "db"; + const char *distMode = "dot"; + const char *resFmt = "dict"; + uint32_t hop = 0; + double radius = 0; + double absThres = 0; + double relThres = 0; + double durRatio = 0; + PyObject *includeKeys = NULL; + PyObject *excludeKeys = NULL; + PyObject *incoming = NULL; + PyObject *outgoing = NULL; + PyObject *thisKey = NULL; + PyObject *currentValue = NULL; + PyObject *newBits = NULL; + npy_intp dims[2]; + unsigned int nDims = 0; + unsigned int nVect = 0; + PyArrayObject *features = NULL; + PyArrayObject *power = NULL; + PyArrayObject *times = NULL; + PyArray_Descr *descr; + adb_status_t *status; + + static char *kwlist[] = { "db", "features", + "seqLength", + "seqStart", + "exhaustive", + "falsePositives", + "accumulation", + "distance", + "npoints",//nearest neighbor points per track + "ntracks", + "includeKeys", + "excludeKeys", + "radius", + "absThres", + "relThres", + "durRatio", + "hopSize", + "resFmt", + "power", + "times", + NULL + }; + + spec = (adb_query_spec_t *)malloc(sizeof(adb_query_spec_t)); + spec->qid.datum = (adb_datum_t *)malloc(sizeof(adb_datum_t)); + result = (adb_query_results_t *)malloc(sizeof(adb_query_results_t)); + + spec->qid.sequence_length = 16; + spec->qid.sequence_start = 0; + spec->qid.flags = 0; + spec->params.npoints = 1; + spec->params.ntracks = 100;//number of results returned in db mode + spec->refine.flags = 0; + + ok = PyArg_ParseTupleAndKeywords(args, keywds, "OO!|iiiissIIOOddddIsO!O!", kwlist, + &incoming, &PyArray_Type, &features, + &spec->qid.sequence_length, + &spec->qid.sequence_start, + &exhaustive, &falsePositives, + &accuMode,&distMode, + &spec->params.npoints, + &spec->params.ntracks, + &includeKeys, &excludeKeys, + &radius, &absThres, &relThres, &durRatio, &hop, + &resFmt, + &PyArray_Type, &power, &PyArray_Type, × + ); + + if (!ok) {return NULL;} + current_db = (adb_t *)PyCObject_AsVoidPtr(incoming); + + if (!features){ /* Sanity Check */ + PyErr_SetString(PyExc_ValueError, + "queryFromData: function requires feature data as numpy ndarray. PythonC required keyword check failed.\n"); + return NULL; + } + + /* Check the dimensionality of passed data agrees with the passed database */ + if(PyArray_NDIM(features)!=2){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed features have incorrect shape, should be (nVecs, nDims).\n"); + return NULL; + } + + + if(power && PyArray_NDIM(power)!=1){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed power have incorrect shape, should be (nVecs,).\n"); + return NULL; + } + + if(times && PyArray_NDIM(times)!=1){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed times have incorrect shape, should be (nVecs,).\n"); + return NULL; + } + + status = (adb_status_t*) malloc(sizeof(adb_status_t)); + int errtest = audiodb_status(current_db, status); + if(errtest){ + PyErr_SetString(PyExc_TypeError, "queryFromData failed: could not get status of passed ADB database"); + free(status); + return NULL; + } + + if(!PyArray_DIMS(features)[1]==status->dim){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed features have incorrect dimensionality.\n"); + free(status); + return NULL; + } + + if(power && PyArray_DIMS(power)[0] != PyArray_DIMS(features)[0]){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed power and features have incompatible nVecs dimension.\n"); + free(status); + return NULL; + } + + if(times && PyArray_DIMS(times)[0] != PyArray_DIMS(features)[0]){ + PyErr_SetString(PyExc_ValueError, + "queryFromData: passed times and features have incompatible nVecs dimension.\n"); + free(status); + return NULL; + } + + free(status); + + + if (exhaustive){ + spec->qid.flags = spec->qid.flags | ADB_QID_FLAG_EXHAUSTIVE; + } + if (falsePositives){ + spec->qid.flags = spec->qid.flags | ADB_QID_FLAG_ALLOW_FALSE_POSITIVES; + } + + //set up spec->params + if (strcmp(accuMode,"db")==0){ + spec->params.accumulation = ADB_ACCUMULATION_DB; + } else if (strcmp(accuMode,"track")==0){ + spec->params.accumulation = ADB_ACCUMULATION_PER_TRACK; + } else if (strcmp(accuMode,"one2one")==0){ + spec->params.accumulation = ADB_ACCUMULATION_ONE_TO_ONE; + } else{ + PyErr_SetString(PyExc_ValueError, + "Poorly specified distance mode. distance must either be \'db\', \'track\' or \'one2one\'.\n"); + return NULL; + } + if (strcmp(distMode, "dot")==0){ + spec->params.distance = ADB_DISTANCE_DOT_PRODUCT; + }else if (strcmp(distMode, "eucNorm")==0){ + spec->params.distance = ADB_DISTANCE_EUCLIDEAN_NORMED; + }else if (strcmp(distMode, "euclidean")==0){ + spec->params.distance = ADB_DISTANCE_EUCLIDEAN; + }else{ + PyErr_SetString(PyExc_ValueError, + "Poorly specified distance mode. distance must either be \'dot\', \'eucNorm\' or \'euclidean\'.\n"); + return NULL; + } + + //set up spec->refine + //include/exclude keys + if (includeKeys){ + if (!PyList_Check(includeKeys)){ + PyErr_SetString(PyExc_TypeError, "Include keys must be specified as a list of strings.\n"); + return NULL; + } + spec->refine.flags = spec->refine.flags | ADB_REFINE_INCLUDE_KEYLIST; + spec->refine.include.nkeys = (uint32_t)PyList_Size(includeKeys); + spec->refine.include.keys = (const char **)calloc(sizeof(const char *), spec->refine.include.nkeys); + for (i=0;irefine.include.nkeys;i++){ + if (PyString_Check(PyList_GetItem(includeKeys, (Py_ssize_t)i))){ + spec->refine.include.keys[i] = PyString_AsString(PyList_GetItem(includeKeys, (Py_ssize_t)i)); + }else{ + PyErr_SetString(PyExc_TypeError, "Include keys must each be specified as a string.\nFound one that was not.\n"); + return NULL; + } + } + } + if (excludeKeys){ + if (!PyList_Check(excludeKeys)){ + PyErr_SetString(PyExc_TypeError, "Exclude keys must be specified as a list of strings.\n"); + return NULL; + } + spec->refine.flags = spec->refine.flags | ADB_REFINE_EXCLUDE_KEYLIST; + spec->refine.exclude.nkeys = (uint32_t)PyList_Size(excludeKeys); + spec->refine.exclude.keys = (const char **)calloc(sizeof(const char *), spec->refine.exclude.nkeys); + for (i=0;irefine.exclude.nkeys;i++){ + if (PyString_Check(PyList_GetItem(excludeKeys, (Py_ssize_t)i))){ + spec->refine.exclude.keys[i] = PyString_AsString(PyList_GetItem(excludeKeys, (Py_ssize_t)i)); + }else{ + PyErr_SetString(PyExc_TypeError, "Exclude keys must each be specified as a string.\nFound one that was not.\n"); + return NULL; + } + } + } + //the rest of spec->refine + if (radius){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_RADIUS; + spec->refine.radius = radius; + } + if (absThres){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_ABSOLUTE_THRESHOLD; + spec->refine.absolute_threshold = absThres; + } + if (relThres){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_RELATIVE_THRESHOLD; + spec->refine.relative_threshold = relThres; + } + if (durRatio){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_DURATION_RATIO; + spec->refine.duration_ratio = durRatio; + } + if (hop){ + spec->refine.flags = spec->refine.flags | ADB_REFINE_HOP_SIZE; + /* not ideal but a temporary bandage fix */ + spec->refine.qhopsize = hop; + spec->refine.ihopsize = hop; + } + + descr = PyArray_DescrFromType(NPY_DOUBLE); + + if (PyArray_AsCArray(&features, &(spec->qid.datum->data), dims, 2, descr)){ + PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the feature np array as a C array."); + return NULL; + } + + if (power){ + if (PyArray_AsCArray(&power, &(spec->qid.datum->power), dims, 1, descr)){ + PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the power np array as a C array."); + return NULL; + } + }else{ + spec->qid.datum->power=NULL; + } + + if (times){ + if (PyArray_AsCArray(×, &(spec->qid.datum->times), dims, 1, descr)){ + PyErr_SetString(PyExc_RuntimeError, "Trouble expressing the times np array as a C array."); + return NULL; + } + }else{ + spec->qid.datum->times=NULL; + } + + nVect = PyArray_DIMS(features)[0]; + nDims = PyArray_DIMS(features)[1]; + spec->qid.datum->nvectors = (uint32_t)nVect; + spec->qid.datum->dim = (uint32_t)nDims; + + result = audiodb_query_spec(current_db, spec); + + if (result == NULL){ + PyErr_SetString(PyExc_RuntimeError, "Encountered an error while running the actual query, or there was nothing returned.\n"); + return NULL; + } + if(strcmp(resFmt, "dict")==0){ + outgoing = PyDict_New(); + for (i=0;inresults;i++){ + thisKey = PyString_FromString(result->results[i].ikey); + if (!PyDict_Contains(outgoing, thisKey)){ + newBits = Py_BuildValue("[(dII)]", + result->results[i].dist, + result->results[i].qpos, + result->results[i].ipos); + if (PyDict_SetItem(outgoing, thisKey,newBits)){ + printf("key : %s\ndist : %f\nqpos : %i\nipos : %i\n", result->results[i].ikey, result->results[i].dist, result->results[i].qpos, result->results[i].ipos); + PyErr_SetString(PyExc_AttributeError, "Error adding a tuple to the result dict\n"); + Py_XDECREF(newBits); + return NULL; + } + Py_DECREF(newBits); + }else { + //the key already has a value, so we need to fetch the value, confirm it's a list and append another tuple to it. + currentValue = PyDict_GetItem(outgoing, thisKey); + if (!PyList_Check(currentValue)){ + PyErr_SetString(PyExc_TypeError, "The result dictionary appears to be malformed.\n"); + return NULL; + } + newBits = Py_BuildValue("dII",result->results[i].dist, + result->results[i].qpos, + result->results[i].ipos); + if (PyList_Append(currentValue, newBits)){ + //error msg here + Py_XDECREF(newBits); + return NULL; + } + if (PyDict_SetItem(outgoing, thisKey, newBits)){ + PyErr_SetString(PyExc_AttributeError, "Error adding a tuple to the result dict\n"); + Py_XDECREF(newBits); + return NULL; + } + Py_DECREF(newBits); + + } + } + }else if(strcmp(resFmt, "list")==0){ + outgoing = PyList_New((Py_ssize_t)0); + for (i=0;inresults;i++){ + newBits = Py_BuildValue("sdII",result->results[i].ikey, + result->results[i].dist, + result->results[i].qpos, + result->results[i].ipos); + if (PyList_Append(outgoing, newBits)){ + //error msg here + Py_XDECREF(newBits); + return NULL; + } + Py_DECREF(newBits); + } + if(PyList_Reverse(outgoing)){//need to do this as things come off the accumulator backward. + PyErr_SetString(PyExc_RuntimeError, + "the reverse failed, hopefully a sensable error will follow.\nIf not, fix it.\n"); + return NULL; + } + }else{ + PyErr_SetString(PyExc_ValueError, + "Poorly specified result mode. Result must be either \'dist\' or \'list\'.\n"); + return NULL; + } + if (audiodb_query_free_results(current_db, spec, result)){ + printf("bit of trouble freeing the result and spec...\ncheck for leaks."); + } + + return outgoing; + + + +} + /* retrieval of inserted data * returned numpy array has ndarray.shape = (numVectors, numDims) @@ -772,6 +1119,8 @@ durRatio = double time expansion/compresion ratio,\n\ hopSize = int hopsize (1 by default)])->resultDict\n\ resFmt = [\"list\"|\"dict\"](\"dict\" by default)"}, + {"_pyadb_queryFromData", (PyCFunction)_pyadb_queryFromData, METH_VARARGS | METH_KEYWORDS, + "data query. Required features=F (numpy ndarray). Optional: power=P (numpy 1d array), times=T (numpy 1d array)"}, {NULL,NULL, 0, NULL} };