annotate audioDB-internals.h @ 503:3d17fdac096a

Makefile improvements from library/application separation. Library object files don't need soap includes, and depend on a different set of header files from the application object files. Run the library tests with "make test"
author mas01cr
date Tue, 13 Jan 2009 21:26:21 +0000
parents 342822c2d49a
children cc2b97d020b1
rev   line source
mas01cr@498 1 #include "accumulator.h"
mas01cr@498 2
mas01cr@498 3 /* this struct is for writing polymorphic routines as puns. When
mas01cr@498 4 * inserting, we might have a "datum" (with actual numerical data) or
mas01cr@498 5 * a "reference" (with strings denoting pathnames containing numerical
mas01cr@498 6 * data), but most of the operations are the same. This struct, used
mas01cr@498 7 * only internally, allows us to write the main body of the insert
mas01cr@498 8 * code only once.
mas01cr@498 9 */
mas01cr@498 10 typedef struct adb_datum_internal {
mas01cr@498 11 uint32_t nvectors;
mas01cr@498 12 uint32_t dim;
mas01cr@498 13 const char *key;
mas01cr@498 14 void *data;
mas01cr@498 15 void *times;
mas01cr@498 16 void *power;
mas01cr@498 17 } adb_datum_internal_t;
mas01cr@498 18
mas01cr@498 19 /* this struct is to collect together a bunch of information about a
mas01cr@498 20 * query (or, in fact, a single database entry, or even a whole
mas01cr@498 21 * database). The _data pointers are immutable (hey, FIXME: should
mas01cr@498 22 * they be constified in some way?) so that free() can work on them
mas01cr@498 23 * later, while the ones without the suffix are mutable to maintain
mas01cr@498 24 * the "current" position in some way. mean_duration points to a
mas01cr@498 25 * (possibly single-element) array of mean durations for each track.
mas01cr@498 26 */
mas01cr@498 27 typedef struct adb_qpointers_internal {
mas01cr@498 28 uint32_t nvectors;
mas01cr@498 29 double *l2norm_data;
mas01cr@498 30 double *l2norm;
mas01cr@498 31 double *power_data;
mas01cr@498 32 double *power;
mas01cr@498 33 double *mean_duration;
mas01cr@498 34 } adb_qpointers_internal_t;
mas01cr@498 35
mas01cr@498 36 /* this struct is for maintaining per-query state. We don't want to
mas01cr@498 37 * store this stuff in the adb struct itself, because (a) it doesn't
mas01cr@498 38 * belong there and (b) in principle people might do two queries in
mas01cr@498 39 * parallel using the same adb handle. (b) is in practice a little
mas01cr@498 40 * bit academic because at the moment we're seeking all over the disk
mas01cr@498 41 * using adb->fd, but changing to use pread() might win us
mas01cr@498 42 * threadsafety eventually.
mas01cr@498 43 */
mas01cr@498 44 typedef struct adb_qstate_internal {
mas01cr@498 45 Accumulator *accumulator;
mas01cr@498 46 std::set<std::string> *allowed_keys;
mas01cr@498 47 std::priority_queue<PointPair> *exact_evaluation_queue;
mas01cr@498 48 LSH *lsh;
mas01cr@498 49 } adb_qstate_internal_t;
mas01cr@498 50
mas01cr@498 51 /* the transparent version of the opaque (forward-declared) adb_t. */
mas01cr@498 52 struct adb {
mas01cr@498 53 char *path;
mas01cr@498 54 int fd;
mas01cr@498 55 int flags;
mas01cr@498 56 adb_header_t *header;
mas01cr@498 57 std::vector<std::string> *keys;
mas01cr@498 58 std::map<std::string,uint32_t> *keymap;
mas01cr@498 59 std::vector<uint32_t> *track_lengths;
mas01cr@498 60 std::vector<off_t> *track_offsets;
mas01cr@498 61 LSH *cached_lsh;
mas01cr@498 62 };
mas01cr@498 63
mas01cr@498 64 typedef struct {
mas01cr@498 65 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 66 return strcmp(r1.key, r2.key) < 0;
mas01cr@498 67 }
mas01cr@498 68 } adb_result_key_lt;
mas01cr@498 69
mas01cr@498 70 typedef struct {
mas01cr@498 71 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 72 return r1.qpos < r2.qpos;
mas01cr@498 73 }
mas01cr@498 74 } adb_result_qpos_lt;
mas01cr@498 75
mas01cr@498 76 typedef struct {
mas01cr@498 77 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 78 return r1.dist < r2.dist;
mas01cr@498 79 }
mas01cr@498 80 } adb_result_dist_lt;
mas01cr@498 81
mas01cr@498 82 typedef struct {
mas01cr@498 83 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 84 return r1.dist > r2.dist;
mas01cr@498 85 }
mas01cr@498 86 } adb_result_dist_gt;
mas01cr@498 87
mas01cr@498 88 typedef struct {
mas01cr@498 89 bool operator() (const adb_result_t &r1, const adb_result_t &r2) {
mas01cr@498 90 return ((r1.ipos < r2.ipos) ||
mas01cr@498 91 ((r1.ipos == r2.ipos) &&
mas01cr@498 92 ((r1.qpos < r2.qpos) ||
mas01cr@498 93 ((r1.qpos == r2.qpos) && (strcmp(r1.key, r2.key) < 0)))));
mas01cr@498 94 }
mas01cr@498 95 } adb_result_triple_lt;
mas01cr@498 96
mas01cr@498 97 /* We could go gcc-specific here and use typeof() instead of passing
mas01cr@498 98 * in an explicit type. Answers on a postcard as to whether that's a
mas01cr@498 99 * good plan or not. */
mas01cr@498 100 #define mmap_or_goto_error(type, var, start, length) \
mas01cr@498 101 { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \
mas01cr@498 102 if(tmp == (void *) -1) { \
mas01cr@498 103 goto error; \
mas01cr@498 104 } \
mas01cr@498 105 var = (type) tmp; \
mas01cr@498 106 }
mas01cr@498 107
mas01cr@498 108 #define maybe_munmap(table, length) \
mas01cr@498 109 { if(table) { \
mas01cr@498 110 munmap(table, length); \
mas01cr@498 111 } \
mas01cr@498 112 }
mas01cr@498 113
mas01cr@498 114 #define write_or_goto_error(fd, buffer, size) \
mas01cr@498 115 { ssize_t tmp = size; \
mas01cr@498 116 if(write(fd, buffer, size) != tmp) { \
mas01cr@498 117 goto error; \
mas01cr@498 118 } \
mas01cr@498 119 }
mas01cr@498 120
mas01cr@498 121 #define read_or_goto_error(fd, buffer, size) \
mas01cr@498 122 { ssize_t tmp = size; \
mas01cr@498 123 if(read(fd, buffer, size) != tmp) { \
mas01cr@498 124 goto error; \
mas01cr@498 125 } \
mas01cr@498 126 }
mas01cr@498 127
mas01cr@498 128 static inline int audiodb_sync_header(adb_t *adb) {
mas01cr@498 129 off_t pos;
mas01cr@498 130 pos = lseek(adb->fd, (off_t) 0, SEEK_CUR);
mas01cr@498 131 if(pos == (off_t) -1) {
mas01cr@498 132 goto error;
mas01cr@498 133 }
mas01cr@498 134 if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) {
mas01cr@498 135 goto error;
mas01cr@498 136 }
mas01cr@498 137 if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) {
mas01cr@498 138 goto error;
mas01cr@498 139 }
mas01cr@498 140
mas01cr@498 141 /* can be fsync() if fdatasync() is racily exciting and new */
mas01cr@498 142 fdatasync(adb->fd);
mas01cr@498 143 if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) {
mas01cr@498 144 goto error;
mas01cr@498 145 }
mas01cr@498 146 return 0;
mas01cr@498 147
mas01cr@498 148 error:
mas01cr@498 149 return 1;
mas01cr@498 150 }
mas01cr@498 151
mas01cr@498 152 static inline double audiodb_dot_product(double *p, double *q, size_t count) {
mas01cr@498 153 double result = 0;
mas01cr@498 154 while(count--) {
mas01cr@498 155 result += *p++ * *q++;
mas01cr@498 156 }
mas01cr@498 157 return result;
mas01cr@498 158 }
mas01cr@498 159
mas01cr@498 160 static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) {
mas01cr@498 161 while(nvectors--) {
mas01cr@498 162 double *d1 = d;
mas01cr@498 163 double *d2 = d;
mas01cr@498 164 *l++ = audiodb_dot_product(d1, d2, dim);
mas01cr@498 165 d += dim;
mas01cr@498 166 }
mas01cr@498 167 }
mas01cr@498 168
mas01cr@498 169 // This is a common pattern in sequence queries: what we are doing is
mas01cr@498 170 // taking a window of length seqlen over a buffer of length length,
mas01cr@498 171 // and placing the sum of the elements in that window in the first
mas01cr@498 172 // element of the window: thus replacing all but the last seqlen
mas01cr@498 173 // elements in the buffer with the corresponding windowed sum.
mas01cr@498 174 static inline void audiodb_sequence_sum(double *buffer, int length, int seqlen) {
mas01cr@498 175 double tmp1, tmp2, *ps;
mas01cr@498 176 int j, w;
mas01cr@498 177
mas01cr@498 178 tmp1 = *buffer;
mas01cr@498 179 j = 1;
mas01cr@498 180 w = seqlen - 1;
mas01cr@498 181 while(w--) {
mas01cr@498 182 *buffer += buffer[j++];
mas01cr@498 183 }
mas01cr@498 184 ps = buffer + 1;
mas01cr@498 185 w = length - seqlen; // +1 - 1
mas01cr@498 186 while(w--) {
mas01cr@498 187 tmp2 = *ps;
mas01cr@498 188 if(isfinite(tmp1)) {
mas01cr@498 189 *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1);
mas01cr@498 190 } else {
mas01cr@498 191 for(int i = 1; i < seqlen; i++) {
mas01cr@498 192 *ps += *(ps + i);
mas01cr@498 193 }
mas01cr@498 194 }
mas01cr@498 195 tmp1 = tmp2;
mas01cr@498 196 ps++;
mas01cr@498 197 }
mas01cr@498 198 }
mas01cr@498 199
mas01cr@498 200 // In contrast to audiodb_sequence_sum() above,
mas01cr@498 201 // audiodb_sequence_sqrt() and audiodb_sequence_average() below are
mas01cr@498 202 // simple mappers across the sequence.
mas01cr@498 203 static inline void audiodb_sequence_sqrt(double *buffer, int length, int seqlen) {
mas01cr@498 204 int w = length - seqlen + 1;
mas01cr@498 205 while(w--) {
mas01cr@498 206 *buffer = sqrt(*buffer);
mas01cr@498 207 buffer++;
mas01cr@498 208 }
mas01cr@498 209 }
mas01cr@498 210
mas01cr@498 211 static inline void audiodb_sequence_average(double *buffer, int length, int seqlen) {
mas01cr@498 212 int w = length - seqlen + 1;
mas01cr@498 213 while(w--) {
mas01cr@498 214 *buffer /= seqlen;
mas01cr@498 215 buffer++;
mas01cr@498 216 }
mas01cr@498 217 }
mas01cr@498 218
mas01cr@498 219 static inline uint32_t audiodb_key_index(adb_t *adb, const char *key) {
mas01cr@498 220 std::map<std::string,uint32_t>::iterator it;
mas01cr@498 221 it = adb->keymap->find(key);
mas01cr@498 222 if(it == adb->keymap->end()) {
mas01cr@498 223 return (uint32_t) -1;
mas01cr@498 224 } else {
mas01cr@498 225 return (*it).second;
mas01cr@498 226 }
mas01cr@498 227 }
mas01cr@498 228
mas01cr@498 229 static inline const char *audiodb_index_key(adb_t *adb, uint32_t index) {
mas01cr@498 230 return (*adb->keys)[index].c_str();
mas01cr@498 231 }
mas01cr@498 232
mas01cr@498 233 static inline uint32_t audiodb_index_to_track_id(uint32_t lshid, uint32_t n_point_bits) {
mas01cr@498 234 return (lshid >> n_point_bits);
mas01cr@498 235 }
mas01cr@498 236
mas01cr@498 237 static inline uint32_t audiodb_index_to_track_pos(uint32_t lshid, uint32_t n_point_bits) {
mas01cr@498 238 return (lshid & ((1 << n_point_bits) - 1));
mas01cr@498 239 }
mas01cr@498 240
mas01cr@498 241 static inline uint32_t audiodb_index_from_trackinfo(uint32_t track_id, uint32_t track_pos, uint32_t n_point_bits) {
mas01cr@498 242 return ((track_id << n_point_bits) | track_pos);
mas01cr@498 243 }
mas01cr@498 244
mas01cr@498 245 static inline uint32_t audiodb_lsh_n_point_bits(adb_t *adb) {
mas01cr@498 246 uint32_t nbits = adb->header->flags >> 28;
mas01cr@498 247 return (nbits ? nbits : O2_DEFAULT_LSH_N_POINT_BITS);
mas01cr@498 248 }
mas01cr@498 249
mas01cr@498 250 int audiodb_read_data(adb_t *, int, int, double **, size_t *);
mas01cr@498 251 int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *);
mas01cr@498 252 int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *);
mas01cr@498 253 int audiodb_free_datum(adb_datum_t *);
mas01cr@498 254 int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *);
mas01cr@498 255 int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *);
mas01cr@498 256 int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *);
mas01cr@498 257 int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);
mas01cr@498 258 char *audiodb_index_get_name(const char *, double, uint32_t);
mas01cr@498 259 bool audiodb_index_exists(const char *, double, uint32_t);
mas01cr@498 260 int audiodb_index_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);