mas01cr@457: #include "accumulator.h" mas01cr@457: mas01cr@457: /* this struct is for writing polymorphic routines as puns. When mas01cr@457: * inserting, we might have a "datum" (with actual numerical data) or mas01cr@457: * a "reference" (with strings denoting pathnames containing numerical mas01cr@457: * data), but most of the operations are the same. This struct, used mas01cr@457: * only internally, allows us to write the main body of the insert mas01cr@457: * code only once. mas01cr@457: */ mas01cr@408: typedef struct adb_datum_internal { mas01cr@408: uint32_t nvectors; mas01cr@408: uint32_t dim; mas01cr@408: const char *key; mas01cr@408: void *data; mas01cr@408: void *times; mas01cr@408: void *power; mas01cr@408: } adb_datum_internal_t; mas01cr@408: mas01cr@463: /* this struct is to collect together a bunch of information about a mas01cr@463: * query (or, in fact, a single database entry, or even a whole mas01cr@463: * database). The _data pointers are immutable (hey, FIXME: should mas01cr@463: * they be constified in some way?) so that free() can work on them mas01cr@463: * later, while the ones without the suffix are mutable to maintain mas01cr@463: * the "current" position in some way. mean_duration points to a mas01cr@463: * (possibly single-element) array of mean durations for each track. mas01cr@463: */ mas01cr@463: typedef struct adb_qpointers_internal { mas01cr@463: uint32_t nvectors; mas01cr@463: double *l2norm_data; mas01cr@463: double *l2norm; mas01cr@463: double *power_data; mas01cr@463: double *power; mas01cr@463: double *mean_duration; mas01cr@463: } adb_qpointers_internal_t; mas01cr@463: mas01cr@457: /* this struct is for maintaining per-query state. We don't want to mas01cr@457: * store this stuff in the adb struct itself, because (a) it doesn't mas01cr@457: * belong there and (b) in principle people might do two queries in mas01cr@457: * parallel using the same adb handle. (b) is in practice a little mas01cr@457: * bit academic because at the moment we're seeking all over the disk mas01cr@457: * using adb->fd, but changing to use pread() might win us mas01cr@457: * threadsafety eventually. mas01cr@457: */ mas01cr@468: typedef struct adb_qstate_internal { mas01cr@468: Accumulator *accumulator; mas01cr@468: std::set *allowed_keys; mas01cr@468: std::priority_queue *exact_evaluation_queue; mas01cr@468: LSH *lsh; mas01cr@468: } adb_qstate_internal_t; mas01cr@457: mas01cr@468: /* the transparent version of the opaque (forward-declared) adb_t. */ mas01cr@402: struct adb { mas01cr@402: char *path; mas01cr@402: int fd; mas01cr@402: int flags; mas01cr@402: adb_header_t *header; mas01cr@453: std::vector *keys; mas01cr@453: std::map *keymap; mas01cr@432: std::vector *track_lengths; mas01cr@442: std::vector *track_offsets; mas01cr@465: LSH *cached_lsh; mas01cr@402: }; mas01cr@402: mas01cr@416: typedef struct { mas01cr@416: bool operator() (const adb_result_t &r1, const adb_result_t &r2) { mas01cr@416: return strcmp(r1.key, r2.key) < 0; mas01cr@416: } mas01cr@416: } adb_result_key_lt; mas01cr@416: mas01cr@416: typedef struct { mas01cr@416: bool operator() (const adb_result_t &r1, const adb_result_t &r2) { mas01cr@416: return r1.qpos < r2.qpos; mas01cr@416: } mas01cr@416: } adb_result_qpos_lt; mas01cr@416: mas01cr@416: typedef struct { mas01cr@416: bool operator() (const adb_result_t &r1, const adb_result_t &r2) { mas01cr@416: return r1.dist < r2.dist; mas01cr@416: } mas01cr@416: } adb_result_dist_lt; mas01cr@416: mas01cr@416: typedef struct { mas01cr@416: bool operator() (const adb_result_t &r1, const adb_result_t &r2) { mas01cr@416: return r1.dist > r2.dist; mas01cr@416: } mas01cr@416: } adb_result_dist_gt; mas01cr@416: mas01cr@416: typedef struct { mas01cr@416: bool operator() (const adb_result_t &r1, const adb_result_t &r2) { mas01cr@416: return ((r1.ipos < r2.ipos) || mas01cr@416: ((r1.ipos == r2.ipos) && mas01cr@416: ((r1.qpos < r2.qpos) || mas01cr@416: ((r1.qpos == r2.qpos) && (strcmp(r1.key, r2.key) < 0))))); mas01cr@416: } mas01cr@416: } adb_result_triple_lt; mas01cr@416: mas01cr@401: /* We could go gcc-specific here and use typeof() instead of passing mas01cr@401: * in an explicit type. Answers on a postcard as to whether that's a mas01cr@401: * good plan or not. */ mas01cr@401: #define mmap_or_goto_error(type, var, start, length) \ mas01cr@401: { void *tmp = mmap(0, length, PROT_READ, MAP_SHARED, adb->fd, (start)); \ mas01cr@401: if(tmp == (void *) -1) { \ mas01cr@401: goto error; \ mas01cr@401: } \ mas01cr@401: var = (type) tmp; \ mas01cr@401: } mas01cr@401: mas01cr@401: #define maybe_munmap(table, length) \ mas01cr@401: { if(table) { \ mas01cr@401: munmap(table, length); \ mas01cr@401: } \ mas01cr@401: } mas01cr@401: mas01cr@410: #define write_or_goto_error(fd, buffer, size) \ mas01cr@410: { ssize_t tmp = size; \ mas01cr@410: if(write(fd, buffer, size) != tmp) { \ mas01cr@410: goto error; \ mas01cr@410: } \ mas01cr@410: } mas01cr@410: mas01cr@410: #define read_or_goto_error(fd, buffer, size) \ mas01cr@410: { ssize_t tmp = size; \ mas01cr@410: if(read(fd, buffer, size) != tmp) { \ mas01cr@410: goto error; \ mas01cr@410: } \ mas01cr@410: } mas01cr@410: mas01cr@401: static inline int audiodb_sync_header(adb_t *adb) { mas01cr@401: off_t pos; mas01cr@401: pos = lseek(adb->fd, (off_t) 0, SEEK_CUR); mas01cr@401: if(pos == (off_t) -1) { mas01cr@401: goto error; mas01cr@401: } mas01cr@401: if(lseek(adb->fd, (off_t) 0, SEEK_SET) == (off_t) -1) { mas01cr@401: goto error; mas01cr@401: } mas01cr@401: if(write(adb->fd, adb->header, O2_HEADERSIZE) != O2_HEADERSIZE) { mas01cr@401: goto error; mas01cr@401: } mas01cr@401: mas01cr@401: /* can be fsync() if fdatasync() is racily exciting and new */ mas01cr@401: fdatasync(adb->fd); mas01cr@401: if(lseek(adb->fd, pos, SEEK_SET) == (off_t) -1) { mas01cr@401: goto error; mas01cr@401: } mas01cr@401: return 0; mas01cr@401: mas01cr@401: error: mas01cr@401: return 1; mas01cr@401: } mas01cr@425: mas01cr@425: static inline double audiodb_dot_product(double *p, double *q, size_t count) { mas01cr@425: double result = 0; mas01cr@425: while(count--) { mas01cr@425: result += *p++ * *q++; mas01cr@425: } mas01cr@425: return result; mas01cr@425: } mas01cr@426: mas01cr@426: static inline void audiodb_l2norm_buffer(double *d, size_t dim, size_t nvectors, double *l) { mas01cr@426: while(nvectors--) { mas01cr@426: double *d1 = d; mas01cr@426: double *d2 = d; mas01cr@426: *l++ = audiodb_dot_product(d1, d2, dim); mas01cr@426: d += dim; mas01cr@426: } mas01cr@426: } mas01cr@427: mas01cr@427: // This is a common pattern in sequence queries: what we are doing is mas01cr@427: // taking a window of length seqlen over a buffer of length length, mas01cr@427: // and placing the sum of the elements in that window in the first mas01cr@427: // element of the window: thus replacing all but the last seqlen mas01cr@427: // elements in the buffer with the corresponding windowed sum. mas01cr@427: static inline void audiodb_sequence_sum(double *buffer, int length, int seqlen) { mas01cr@427: double tmp1, tmp2, *ps; mas01cr@427: int j, w; mas01cr@427: mas01cr@427: tmp1 = *buffer; mas01cr@427: j = 1; mas01cr@427: w = seqlen - 1; mas01cr@427: while(w--) { mas01cr@427: *buffer += buffer[j++]; mas01cr@427: } mas01cr@427: ps = buffer + 1; mas01cr@427: w = length - seqlen; // +1 - 1 mas01cr@427: while(w--) { mas01cr@427: tmp2 = *ps; mas01cr@427: if(isfinite(tmp1)) { mas01cr@427: *ps = *(ps - 1) - tmp1 + *(ps + seqlen - 1); mas01cr@427: } else { mas01cr@427: for(int i = 1; i < seqlen; i++) { mas01cr@427: *ps += *(ps + i); mas01cr@427: } mas01cr@427: } mas01cr@427: tmp1 = tmp2; mas01cr@427: ps++; mas01cr@427: } mas01cr@427: } mas01cr@427: mas01cr@427: // In contrast to audiodb_sequence_sum() above, mas01cr@427: // audiodb_sequence_sqrt() and audiodb_sequence_average() below are mas01cr@427: // simple mappers across the sequence. mas01cr@427: static inline void audiodb_sequence_sqrt(double *buffer, int length, int seqlen) { mas01cr@427: int w = length - seqlen + 1; mas01cr@427: while(w--) { mas01cr@427: *buffer = sqrt(*buffer); mas01cr@427: buffer++; mas01cr@427: } mas01cr@427: } mas01cr@427: mas01cr@427: static inline void audiodb_sequence_average(double *buffer, int length, int seqlen) { mas01cr@427: int w = length - seqlen + 1; mas01cr@427: while(w--) { mas01cr@427: *buffer /= seqlen; mas01cr@427: buffer++; mas01cr@427: } mas01cr@427: } mas01cr@430: mas01cr@430: static inline uint32_t audiodb_key_index(adb_t *adb, const char *key) { mas01cr@430: std::map::iterator it; mas01cr@453: it = adb->keymap->find(key); mas01cr@453: if(it == adb->keymap->end()) { mas01cr@430: return (uint32_t) -1; mas01cr@430: } else { mas01cr@430: return (*it).second; mas01cr@430: } mas01cr@430: } mas01cr@433: mas01cr@469: static inline const char *audiodb_index_key(adb_t *adb, uint32_t index) { mas01cr@469: return (*adb->keys)[index].c_str(); mas01cr@469: } mas01cr@469: mas01cr@458: static inline uint32_t audiodb_index_to_track_id(uint32_t lshid, uint32_t n_point_bits) { mas01cr@458: return (lshid >> n_point_bits); mas01cr@458: } mas01cr@458: mas01cr@458: static inline uint32_t audiodb_index_to_track_pos(uint32_t lshid, uint32_t n_point_bits) { mas01cr@458: return (lshid & ((1 << n_point_bits) - 1)); mas01cr@458: } mas01cr@458: mas01cr@458: static inline uint32_t audiodb_index_from_trackinfo(uint32_t track_id, uint32_t track_pos, uint32_t n_point_bits) { mas01cr@458: return ((track_id << n_point_bits) | track_pos); mas01cr@458: } mas01cr@458: mas01cr@458: static inline uint32_t audiodb_lsh_n_point_bits(adb_t *adb) { mas01cr@458: uint32_t nbits = adb->header->flags >> 28; mas01cr@458: return (nbits ? nbits : O2_DEFAULT_LSH_N_POINT_BITS); mas01cr@458: } mas01cr@458: mas01cr@433: int audiodb_read_data(adb_t *, int, int, double **, size_t *); mas01cr@443: int audiodb_insert_create_datum(adb_insert_t *, adb_datum_t *); mas01cr@461: int audiodb_track_id_datum(adb_t *, uint32_t, adb_datum_t *); mas01cr@443: int audiodb_free_datum(adb_datum_t *); mas01cr@461: int audiodb_datum_qpointers(adb_datum_t *, uint32_t, double **, double **, adb_qpointers_internal_t *); mas01cr@473: int audiodb_query_spec_qpointers(adb_t *, const adb_query_spec_t *, double **, double **, adb_qpointers_internal_t *); mas01cr@473: int audiodb_query_queue_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *, double *, adb_qpointers_internal_t *); mas01cr@473: int audiodb_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *); mas01cr@460: char *audiodb_index_get_name(const char *, double, uint32_t); mas01cr@460: bool audiodb_index_exists(const char *, double, uint32_t); mas01cr@473: int audiodb_index_query_loop(adb_t *, const adb_query_spec_t *, adb_qstate_internal_t *);