comparison create.cpp @ 324:c93be2f3a674

Merge of branches/large_adb -r 514:524 onto the trunk. No conflicts. Added LARGE_ADB support. Turn on with --ntracks 20001 or greater. Use --adb_feature_root to locate feature files at QUERY time. A bug fix in LSH indexing that was incorrectly thresholding large numbers of shingles.
author mas01mc
date Thu, 21 Aug 2008 21:28:33 +0000
parents 896679d8cc39
children 521812d63516
comparison
equal deleted inserted replaced
315:d2c56d4f841e 324:c93be2f3a674
1 #include "audioDB.h" 1 #include "audioDB.h"
2 2
3 /* Make a new database. 3 /* Make a new database.
4 4
5 IF size(featuredata) < O2_LARGE_ADB_SIZE
5 The database consists of: 6 The database consists of:
6 7
7 * a header (see dbTableHeader struct definition); 8 * a header (see dbTableHeader struct definition);
8 * keyTable: list of keys of tracks; 9 * keyTable: list of keys of tracks;
9 * trackTable: Maps implicit feature index to a feature vector 10 * trackTable: Maps implicit feature index to a feature vector
10 matrix (sizes of tracks) 11 matrix (sizes of tracks)
11 * featureTable: Lots of doubles; 12 * featureTable: Lots of doubles;
12 * timesTable: (start,end) time points for each feature vector; 13 * timesTable: (start,end) time points for each feature vector;
13 * powerTable: associated power for each feature vector; 14 * powerTable: associated power for each feature vector;
14 * l2normTable: squared l2norms for each feature vector. 15 * l2normTable: squared l2norms for each feature vector.
16
17 ELSE the database consists of:
18
19 * a header (see dbTableHeader struct definition);
20 * keyTable: list of keys of tracks
21 * trackTable: sizes of tracks
22 * featureTable: list of feature file names
23 * timesTable: list of times file names
24 * powerTable: list of power file names
25
15 */ 26 */
16 27
17 void audioDB::create(const char* dbName){ 28 void audioDB::create(const char* dbName){
18 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0) 29 if ((dbfid = open (dbName, O_RDWR|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)) < 0)
19 error("Can't create database file", dbName, "open"); 30 error("Can't create database file", dbName, "open");
39 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks); 50 dbH->dataOffset = ALIGN_PAGE_UP(dbH->trackTableOffset + O2_TRACKTABLE_ENTRY_SIZE*ntracks);
40 51
41 off_t databytes = ((off_t) datasize) * 1024 * 1024; 52 off_t databytes = ((off_t) datasize) * 1024 * 1024;
42 off_t auxbytes = databytes / datadim; 53 off_t auxbytes = databytes / datadim;
43 54
44 dbH->timesTableOffset = ALIGN_PAGE_UP(dbH->dataOffset + databytes); 55 // For backward-compatibility, Record the point-encoding parameter for LSH indexing in the adb header
45 dbH->powerTableOffset = ALIGN_PAGE_UP(dbH->timesTableOffset + 2*auxbytes); 56 // If this value is 0 then it will be set to 14
46 dbH->l2normTableOffset = ALIGN_PAGE_UP(dbH->powerTableOffset + auxbytes); 57
47 dbH->dbSize = ALIGN_PAGE_UP(dbH->l2normTableOffset + auxbytes); 58 #if O2_LSH_N_POINT_BITS > 15
59 #error "AudioDB Compile ERROR: consistency check of O2_LSH_POINT_BITS failed (>15)"
60 #endif
61
62 dbH->flags |= LSH_N_POINT_BITS << 28;
63
64 // If database will fit in a single file the vectors are copied into the AudioDB instance
65 // Else all the vectors are left on the FileSystem and we use the dataOffset as storage
66 // for the location of the features, powers and times files (assuming that arbitrary keys are used for the fileTable)
67 if(ntracks<O2_LARGE_ADB_NTRACKS && datasize<O2_LARGE_ADB_SIZE){
68 dbH->timesTableOffset = ALIGN_PAGE_UP(dbH->dataOffset + databytes);
69 dbH->powerTableOffset = ALIGN_PAGE_UP(dbH->timesTableOffset + 2*auxbytes);
70 dbH->l2normTableOffset = ALIGN_PAGE_UP(dbH->powerTableOffset + auxbytes);
71 dbH->dbSize = ALIGN_PAGE_UP(dbH->l2normTableOffset + auxbytes);
72 }
73 else{ // Create LARGE_ADB, features and powers kept on filesystem
74 dbH->flags |= O2_FLAG_LARGE_ADB;
75 dbH->timesTableOffset = ALIGN_PAGE_UP(dbH->dataOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
76 dbH->powerTableOffset = ALIGN_PAGE_UP(dbH->timesTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
77 dbH->l2normTableOffset = ALIGN_PAGE_UP(dbH->powerTableOffset + O2_FILETABLE_ENTRY_SIZE*ntracks);
78 dbH->dbSize = dbH->l2normTableOffset;
79 }
48 80
49 write(dbfid, dbH, O2_HEADERSIZE); 81 write(dbfid, dbH, O2_HEADERSIZE);
50 82
51 // go to the location corresponding to the last byte 83 // go to the location corresponding to the last byte
52 if (lseek (dbfid, dbH->dbSize - 1, SEEK_SET) == -1) 84 if (lseek (dbfid, dbH->dbSize - 1, SEEK_SET) == -1)