annotate audioDB-internals.h @ 457:823bca1e10f5 api-inversion

Sketch out a "query state" structure. As yet it's completely unused, but the intention is that accumulated state will be collected into one of these structures for each query, and then passed around, to help reduce the need for silly arglists. It's possible that this structure will also grow a pointer to the adb itself, and be the thing passed to the LSH callback; we'll see how that develops.
author mas01cr
date Wed, 24 Dec 2008 10:57:30 +0000
parents 16a903968d18
children 913a95f06998
rev   line source
mas01cr@457 1 #include "accumulator.h"
mas01cr@457 2
mas01cr@457 3 /* this struct is for writing polymorphic routines as puns. When
mas01cr@457 4 * inserting, we might have a "datum" (with actual numerical data) or
mas01cr@457 5 * a "reference" (with strings denoting pathnames containing numerical
mas01cr@457 6 * data), but most of the operations are the same. This struct, used
mas01cr@457 7 * only internally, allows us to write the main body of the insert
mas01cr@457 8 * code only once.
mas01cr@457 9 */
mas01cr@408 10 typedef struct adb_datum_internal {
mas01cr@408 11 uint32_t nvectors;
mas01cr@408 12 uint32_t dim;
mas01cr@408 13 const char *key;
mas01cr@408 14 void *data;
mas01cr@408 15 void *times;
mas01cr@408 16 void *power;
mas01cr@408 17 } adb_datum_internal_t;
mas01cr@408 18
mas01cr@457 19 /* this struct is for maintaining per-query state. We don't want to
mas01cr@457 20 * store this stuff in the adb struct itself, because (a) it doesn't
mas01cr@457 21 * belong there and (b) in principle people might do two queries in
mas01cr@457 22 * parallel using the same adb handle. (b) is in practice a little
mas01cr@457 23 * bit academic because at the moment we're seeking all over the disk
mas01cr@457 24 * using adb->fd, but changing to use pread() might win us
mas01cr@457 25 * threadsafety eventually.
mas01cr@457 26 */
mas01cr@457 27 typedef struct adb_qstate_internal {
mas01cr@457 28 Accumulator *accumulator;
mas01cr@457 29 adb_qpointers_internal_t *qpointers;
mas01cr@457 30 adb_qpointers_internal_t *dbpointers;
mas01cr@457 31 std::set<std::string> *allowed_keys;
mas01cr@457 32 } adb_qstate_internal_t;
mas01cr@457 33
mas01cr@402 34 struct adb {
mas01cr@402 35 char *path;
mas01cr@402 36 int fd;
mas01cr@402 37 int flags;
mas01cr@402 38 adb_header_t *header;
mas01cr@453 39 std::vector<std::string> *keys;
mas01cr@453 40 std::map<std::string,uint32_t> *keymap;
mas01cr@432 41 std::vector<uint32_t> *track_lengths;
mas01cr@442 42 std::vector<off_t> *track_offsets;
mas01cr@402 43 };
mas01cr@402 44
mas01cr@416 45 typedef struct {
mas01cr@416 46 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 47 return strcmp(r1.key, r2.key) < 0;
mas01cr@416 48 }
mas01cr@416 49 } adb_result_key_lt;
mas01cr@416 50
mas01cr@416 51 typedef struct {
mas01cr@416 52 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 53 return r1.qpos < r2.qpos;
mas01cr@416 54 }
mas01cr@416 55 } adb_result_qpos_lt;
mas01cr@416 56
mas01cr@416 57 typedef struct {
mas01cr@416 58 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 59 return r1.dist < r2.dist;
mas01cr@416 60 }
mas01cr@416 61 } adb_result_dist_lt;
mas01cr@416 62
mas01cr@416 63 typedef struct {
mas01cr@416 64 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 65 return r1.dist > r2.dist;
mas01cr@416 66 }
mas01cr@416 67 } adb_result_dist_gt;
mas01cr@416 68
mas01cr@416 69 typedef struct {
mas01cr@416 70 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@416 71 return ((r1.ipos < r2.ipos) ||
mas01cr@416 72 ((r1.ipos == r2.ipos) &&
mas01cr@416 73 ((r1.qpos < r2.qpos) ||
mas01cr@416 74 ((r1.qpos == r2.qpos) && (strcmp(r1.key, r2.key) < 0)))));
mas01cr@416 75 }
mas01cr@416 76 } adb_result_triple_lt;
mas01cr@416 77
mas01cr@401 78 /* We could go gcc-specific here and use typeof() instead of passing
mas01cr@401 79 * in an explicit type. Answers on a postcard as to whether that's a
mas01cr@401 80 * good plan or not. */
mas01cr@401 81 #define mmap_or_goto_error(type, var, start, length) \
mas01cr@401 82 { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
mas01cr@401 83 if(tmp == (void *) -1) { \
mas01cr@401 84 goto error; \
mas01cr@401 85 } \
mas01cr@401 86 var = (type) tmp; \
mas01cr@401 87 }
mas01cr@401 88
mas01cr@401 89 #define maybe_munmap(table, length) \
mas01cr@401 90 { if(table) { \
mas01cr@401 91 munmap(table, length); \
mas01cr@401 92 } \
mas01cr@401 93 }
mas01cr@401 94
mas01cr@410 95 #define write_or_goto_error(fd, buffer, size) \
mas01cr@410 96 { ssize_t tmp = size; \
mas01cr@410 97 if(write(fd, buffer, size) != tmp) { \
mas01cr@410 98 goto error; \
mas01cr@410 99 } \
mas01cr@410 100 }
mas01cr@410 101
mas01cr@410 102 #define read_or_goto_error(fd, buffer, size) \
mas01cr@410 103 { ssize_t tmp = size; \
mas01cr@410 104 if(read(fd, buffer, size) != tmp) { \
mas01cr@410 105 goto error; \
mas01cr@410 106 } \
mas01cr@410 107 }
mas01cr@410 108
mas01cr@401 109 static inline int audiodb_sync_header(adb_t *adb) {
mas01cr@401 110 off_t pos;
mas01cr@401 111 pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
mas01cr@401 112 if(pos == (off_t) -1) {
mas01cr@401 113 goto error;
mas01cr@401 114 }
mas01cr@401 115 if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
mas01cr@401 116 goto error;
mas01cr@401 117 }
mas01cr@401 118 if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@401 119 goto error;
mas01cr@401 120 }
mas01cr@401 121
mas01cr@401 122 /* can be fsync() if fdatasync() is racily exciting and new */
mas01cr@401 123 fdatasync(adb->fd);
mas01cr@401 124 if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
mas01cr@401 125 goto error;
mas01cr@401 126 }
mas01cr@401 127 return 0;
mas01cr@401 128
mas01cr@401 129 error:
mas01cr@401 130 return 1;
mas01cr@401 131 }
mas01cr@425 132
mas01cr@425 133 static inline double audiodb_dot_product(double *p, double *q, size_t count) {
mas01cr@425 134 double result = 0;
mas01cr@425 135 while(count--) {
mas01cr@425 136 result += *p++ * *q++;
mas01cr@425 137 }
mas01cr@425 138 return result;
mas01cr@425 139 }
mas01cr@426 140
mas01cr@426 141 static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) {
mas01cr@426 142 while(nvectors--) {
mas01cr@426 143 double *d1 = d;
mas01cr@426 144 double *d2 = d;
mas01cr@426 145 *l++ = audiodb_dot_product(d1, d2, dim);
mas01cr@426 146 d += dim;
mas01cr@426 147 }
mas01cr@426 148 }
mas01cr@427 149
mas01cr@427 150 // This is a common pattern in sequence queries: what we are doing is
mas01cr@427 151 // taking a window of length seqlen over a buffer of length length,
mas01cr@427 152 // and placing the sum of the elements in that window in the first
mas01cr@427 153 // element of the window: thus replacing all but the last seqlen
mas01cr@427 154 // elements in the buffer with the corresponding windowed sum.
mas01cr@427 155 static inline void audiodb_sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@427 156 double tmp1, tmp2, *ps;
mas01cr@427 157 int j, w;
mas01cr@427 158
mas01cr@427 159 tmp1 = *buffer;
mas01cr@427 160 j = 1;
mas01cr@427 161 w = seqlen - 1;
mas01cr@427 162 while(w--) {
mas01cr@427 163 *buffer += buffer[j++];
mas01cr@427 164 }
mas01cr@427 165 ps = buffer + 1;
mas01cr@427 166 w = length - seqlen; // +1 - 1
mas01cr@427 167 while(w--) {
mas01cr@427 168 tmp2 = *ps;
mas01cr@427 169 if(isfinite(tmp1)) {
mas01cr@427 170 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@427 171 } else {
mas01cr@427 172 for(int i = 1; i < seqlen; i++) {
mas01cr@427 173 *ps += *(ps + i);
mas01cr@427 174 }
mas01cr@427 175 }
mas01cr@427 176 tmp1 = tmp2;
mas01cr@427 177 ps++;
mas01cr@427 178 }
mas01cr@427 179 }
mas01cr@427 180
mas01cr@427 181 // In contrast to audiodb_sequence_sum() above,
mas01cr@427 182 // audiodb_sequence_sqrt() and audiodb_sequence_average() below are
mas01cr@427 183 // simple mappers across the sequence.
mas01cr@427 184 static inline void audiodb_sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@427 185 int w = length - seqlen + 1;
mas01cr@427 186 while(w--) {
mas01cr@427 187 *buffer = sqrt(*buffer);
mas01cr@427 188 buffer++;
mas01cr@427 189 }
mas01cr@427 190 }
mas01cr@427 191
mas01cr@427 192 static inline void audiodb_sequence_average(double *buffer, int length, int seqlen) {
mas01cr@427 193 int w = length - seqlen + 1;
mas01cr@427 194 while(w--) {
mas01cr@427 195 *buffer /= seqlen;
mas01cr@427 196 buffer++;
mas01cr@427 197 }
mas01cr@427 198 }
mas01cr@430 199
mas01cr@430 200 static inline uint32_t audiodb_key_index(adb_t *adb, const char *key) {
mas01cr@430 201 std::map<std::string,uint32_t>::iterator it;
mas01cr@453 202 it = adb->keymap->find(key);
mas01cr@453 203 if(it == adb->keymap->end()) {
mas01cr@430 204 return (uint32_t) -1;
mas01cr@430 205 } else {
mas01cr@430 206 return (*it).second;
mas01cr@430 207 }
mas01cr@430 208 }
mas01cr@433 209
mas01cr@433 210 int audiodb_read_data(adb_t *, int, int, double **, size_t *);
mas01cr@443 211 int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *);
mas01cr@443 212 int audiodb_free_datum(adb_datum_t *);
mas01cr@444 213 int audiodb_query_spec_qpointers(adb_t *, adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);