mas01cr@239
|
1 #include "audioDB.h"
|
mas01cr@239
|
2
|
mas01cr@239
|
3 bool audioDB::enough_data_space_free(off_t size) {
|
mas01cr@239
|
4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
|
mas01cr@239
|
5 }
|
mas01cr@239
|
6
|
mas01cr@239
|
7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
|
mas01cr@239
|
8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
|
mas01cr@239
|
9 write(dbfid, buffer, size);
|
mas01cr@239
|
10 }
|
mas01cr@239
|
11
|
mas01cr@239
|
12 void audioDB::insert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
13 forWrite = true;
|
mas01cr@239
|
14 initTables(dbName, inFile);
|
mas01cr@239
|
15
|
mas01cr@239
|
16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@239
|
17 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@239
|
18
|
mas01cr@239
|
19 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@239
|
20 error("Must use power with power-enabled database", dbName);
|
mas01cr@239
|
21
|
mas01cr@239
|
22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@239
|
23 error("Insert failed: no more room in database", inFile);
|
mas01cr@239
|
24 }
|
mas01cr@239
|
25
|
mas01cr@239
|
26 if(!key)
|
mas01cr@239
|
27 key=inFile;
|
mas01cr@239
|
28 // Linear scan of filenames check for pre-existing feature
|
mas01cr@239
|
29 unsigned alreadyInserted=0;
|
mas01cr@239
|
30 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@239
|
31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
|
mas01cr@239
|
32 alreadyInserted=1;
|
mas01cr@239
|
33 break;
|
mas01cr@239
|
34 }
|
mas01cr@239
|
35
|
mas01cr@239
|
36 if(alreadyInserted) {
|
mas01cr@239
|
37 VERB_LOG(0, "key already exists in database; ignoring: %s\n", inFile);
|
mas01cr@239
|
38 return;
|
mas01cr@239
|
39 }
|
mas01cr@239
|
40
|
mas01cr@239
|
41 // Make a track index table of features to file indexes
|
mas01cr@239
|
42 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@239
|
43 if(!numVectors) {
|
mas01cr@239
|
44 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", key);
|
mas01cr@239
|
45
|
mas01cr@239
|
46 // CLEAN UP
|
mas01cr@239
|
47 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
48 munmap(db,dbH->dbSize);
|
mas01cr@239
|
49 close(infid);
|
mas01cr@239
|
50 return;
|
mas01cr@239
|
51 }
|
mas01cr@239
|
52
|
mas01cr@239
|
53 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
|
mas01cr@239
|
54
|
mas01cr@239
|
55 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@239
|
56
|
mas01cr@239
|
57 // Check times status and insert times from file
|
mas01cr@239
|
58 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
|
mas01cr@239
|
59 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@239
|
60
|
mas01cr@239
|
61 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@239
|
62 error("out of space for times", key);
|
mas01cr@239
|
63 }
|
mas01cr@239
|
64
|
mas01cr@239
|
65 if (usingTimes) {
|
mas01cr@239
|
66 insertTimeStamps(numVectors, timesFile, timesdata);
|
mas01cr@239
|
67 }
|
mas01cr@239
|
68
|
mas01cr@239
|
69 double *powerdata = powerTable + indexoffset;
|
mas01cr@239
|
70 insertPowerData(numVectors, powerfd, powerdata);
|
mas01cr@239
|
71
|
mas01cr@239
|
72 // Increment file count
|
mas01cr@239
|
73 dbH->numFiles++;
|
mas01cr@239
|
74
|
mas01cr@239
|
75 // Update Header information
|
mas01cr@239
|
76 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@239
|
77
|
mas01cr@239
|
78 // Update track to file index map
|
mas01cr@239
|
79 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
|
mas01cr@239
|
80
|
mas01cr@239
|
81 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@239
|
82
|
mas01cr@239
|
83 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@239
|
84 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@239
|
85 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@239
|
86
|
mas01cr@239
|
87 // Report status
|
mas01cr@239
|
88 status(dbName);
|
mas01cr@239
|
89 VERB_LOG(0, "%s %s %u vectors %jd bytes.\n", COM_INSERT, dbName, numVectors, (intmax_t) (statbuf.st_size - sizeof(int)));
|
mas01cr@239
|
90
|
mas01cr@239
|
91 // Copy the header back to the database
|
mas01cr@239
|
92 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@239
|
93
|
mas01cr@239
|
94 // CLEAN UP
|
mas01cr@239
|
95 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
96 close(infid);
|
mas01cr@239
|
97 }
|
mas01cr@239
|
98
|
mas01cr@239
|
99 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
|
mas01cr@239
|
100 assert(usingTimes);
|
mas01cr@239
|
101
|
mas01cr@239
|
102 unsigned numtimes = 0;
|
mas01cr@239
|
103
|
mas01cr@239
|
104 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
|
mas01cr@239
|
105 dbH->flags=dbH->flags|O2_FLAG_TIMES;
|
mas01cr@239
|
106 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
|
mas01cr@239
|
107 error("Timestamp file used with non-timestamped database", timesFileName);
|
mas01cr@239
|
108 }
|
mas01cr@239
|
109
|
mas01cr@239
|
110 if(!timesFile->is_open()) {
|
mas01cr@239
|
111 error("problem opening times file on timestamped database", timesFileName);
|
mas01cr@239
|
112 }
|
mas01cr@239
|
113
|
mas01cr@239
|
114 double timepoint, next;
|
mas01cr@239
|
115 *timesFile >> timepoint;
|
mas01cr@239
|
116 if (timesFile->eof()) {
|
mas01cr@239
|
117 error("no entries in times file", timesFileName);
|
mas01cr@239
|
118 }
|
mas01cr@239
|
119 numtimes++;
|
mas01cr@239
|
120 do {
|
mas01cr@239
|
121 *timesFile >> next;
|
mas01cr@239
|
122 if (timesFile->eof()) {
|
mas01cr@239
|
123 break;
|
mas01cr@239
|
124 }
|
mas01cr@239
|
125 numtimes++;
|
mas01cr@239
|
126 timesdata[0] = timepoint;
|
mas01cr@239
|
127 timepoint = (timesdata[1] = next);
|
mas01cr@239
|
128 timesdata += 2;
|
mas01cr@239
|
129 } while (numtimes < numVectors + 1);
|
mas01cr@239
|
130
|
mas01cr@239
|
131 if (numtimes < numVectors + 1) {
|
mas01cr@239
|
132 error("too few timepoints in times file", timesFileName);
|
mas01cr@239
|
133 }
|
mas01cr@239
|
134
|
mas01cr@239
|
135 *timesFile >> next;
|
mas01cr@239
|
136 if (!timesFile->eof()) {
|
mas01cr@239
|
137 error("too many timepoints in times file", timesFileName);
|
mas01cr@239
|
138 }
|
mas01cr@239
|
139 }
|
mas01cr@239
|
140
|
mas01cr@239
|
141 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
|
mas01cr@239
|
142 if (usingPower) {
|
mas01cr@239
|
143 if (!(dbH->flags & O2_FLAG_POWER)) {
|
mas01cr@239
|
144 error("Cannot insert power data on non-power DB", dbName);
|
mas01cr@239
|
145 }
|
mas01cr@239
|
146
|
mas01cr@239
|
147 int one;
|
mas01cr@239
|
148 unsigned int count;
|
mas01cr@239
|
149
|
mas01cr@239
|
150 count = read(powerfd, &one, sizeof(unsigned int));
|
mas01cr@239
|
151 if (count != sizeof(unsigned int)) {
|
mas01cr@239
|
152 error("powerfd read failed", "int", "read");
|
mas01cr@239
|
153 }
|
mas01cr@239
|
154 if (one != 1) {
|
mas01cr@239
|
155 error("dimensionality of power file not 1", powerFileName);
|
mas01cr@239
|
156 }
|
mas01cr@239
|
157
|
mas01cr@239
|
158 // FIXME: should check that the powerfile is the right size for
|
mas01cr@239
|
159 // this. -- CSR, 2007-10-30
|
mas01cr@239
|
160 count = read(powerfd, powerdata, numVectors * sizeof(double));
|
mas01cr@239
|
161 if (count != numVectors * sizeof(double)) {
|
mas01cr@239
|
162 error("powerfd read failed", "double", "read");
|
mas01cr@239
|
163 }
|
mas01cr@239
|
164 }
|
mas01cr@239
|
165 }
|
mas01cr@239
|
166
|
mas01cr@239
|
167 void audioDB::batchinsert(const char* dbName, const char* inFile) {
|
mas01cr@239
|
168
|
mas01cr@239
|
169 forWrite = true;
|
mas01cr@239
|
170 initDBHeader(dbName);
|
mas01cr@239
|
171
|
mas01cr@239
|
172 if(!key)
|
mas01cr@239
|
173 key=inFile;
|
mas01cr@239
|
174 std::ifstream *filesIn = 0;
|
mas01cr@239
|
175 std::ifstream *keysIn = 0;
|
mas01cr@239
|
176 std::ifstream* thisTimesFile = 0;
|
mas01cr@239
|
177 int thispowerfd = 0;
|
mas01cr@239
|
178
|
mas01cr@239
|
179 if(!(filesIn = new std::ifstream(inFile)))
|
mas01cr@239
|
180 error("Could not open batch in file", inFile);
|
mas01cr@239
|
181 if(key && key!=inFile)
|
mas01cr@239
|
182 if(!(keysIn = new std::ifstream(key)))
|
mas01cr@239
|
183 error("Could not open batch key file",key);
|
mas01cr@239
|
184
|
mas01cr@239
|
185 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@239
|
186 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@239
|
187
|
mas01cr@239
|
188 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@239
|
189 error("Must use power with power-enabled database", dbName);
|
mas01cr@239
|
190
|
mas01cr@239
|
191 unsigned totalVectors=0;
|
mas01cr@239
|
192 char *thisKey = new char[MAXSTR];
|
mas01cr@239
|
193 char *thisFile = new char[MAXSTR];
|
mas01cr@239
|
194 char *thisTimesFileName = new char[MAXSTR];
|
mas01cr@239
|
195 char *thisPowerFileName = new char[MAXSTR];
|
mas01cr@239
|
196
|
mas01cr@239
|
197 do{
|
mas01cr@239
|
198 filesIn->getline(thisFile,MAXSTR);
|
mas01cr@239
|
199 if(key && key!=inFile)
|
mas01cr@239
|
200 keysIn->getline(thisKey,MAXSTR);
|
mas01cr@239
|
201 else
|
mas01cr@239
|
202 thisKey = thisFile;
|
mas01cr@239
|
203 if(usingTimes)
|
mas01cr@239
|
204 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01cr@239
|
205 if(usingPower)
|
mas01cr@239
|
206 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01cr@239
|
207
|
mas01cr@239
|
208 if(filesIn->eof())
|
mas01cr@239
|
209 break;
|
mas01cr@239
|
210
|
mas01cr@239
|
211 initInputFile(thisFile);
|
mas01cr@239
|
212
|
mas01cr@239
|
213 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@239
|
214 error("batchinsert failed: no more room in database", thisFile);
|
mas01cr@239
|
215 }
|
mas01cr@239
|
216
|
mas01cr@239
|
217 // Linear scan of filenames check for pre-existing feature
|
mas01cr@239
|
218 unsigned alreadyInserted=0;
|
mas01cr@239
|
219
|
mas01cr@239
|
220 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@239
|
221 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
|
mas01cr@239
|
222 alreadyInserted=1;
|
mas01cr@239
|
223 break;
|
mas01cr@239
|
224 }
|
mas01cr@239
|
225
|
mas01cr@239
|
226 if(alreadyInserted) {
|
mas01cr@239
|
227 VERB_LOG(0, "key already exists in database: %s\n", thisKey);
|
mas01cr@239
|
228 } else {
|
mas01cr@239
|
229 // Make a track index table of features to file indexes
|
mas01cr@239
|
230 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@239
|
231 if(!numVectors) {
|
mas01cr@239
|
232 VERB_LOG(0, "ignoring zero-length feature vector file: %s\n", thisKey);
|
mas01cr@239
|
233 }
|
mas01cr@239
|
234 else{
|
mas01cr@239
|
235 if(usingTimes){
|
mas01cr@239
|
236 if(timesFile->eof()) {
|
mas01cr@239
|
237 error("not enough timestamp files in timesList", timesFileName);
|
mas01cr@239
|
238 }
|
mas01cr@239
|
239 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01cr@239
|
240 if(!thisTimesFile->is_open()) {
|
mas01cr@239
|
241 error("Cannot open timestamp file", thisTimesFileName);
|
mas01cr@239
|
242 }
|
mas01cr@239
|
243 off_t insertoffset = dbH->length;
|
mas01cr@239
|
244 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
|
mas01cr@239
|
245 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@239
|
246 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@239
|
247 error("out of space for times", key);
|
mas01cr@239
|
248 }
|
mas01cr@239
|
249 insertTimeStamps(numVectors, thisTimesFile, timesdata);
|
mas01cr@239
|
250 if(thisTimesFile)
|
mas01cr@239
|
251 delete thisTimesFile;
|
mas01cr@239
|
252 }
|
mas01cr@239
|
253
|
mas01cr@239
|
254 if (usingPower) {
|
mas01cr@239
|
255 if(powerFile->eof()) {
|
mas01cr@239
|
256 error("not enough power files in powerList", powerFileName);
|
mas01cr@239
|
257 }
|
mas01cr@239
|
258 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01cr@239
|
259 if (thispowerfd < 0) {
|
mas01cr@239
|
260 error("failed to open power file", thisPowerFileName);
|
mas01cr@239
|
261 }
|
mas01cr@239
|
262 off_t insertoffset = dbH->length;
|
mas01cr@239
|
263 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
|
mas01cr@239
|
264 double *powerdata = powerTable + poweroffset;
|
mas01cr@239
|
265 insertPowerData(numVectors, thispowerfd, powerdata);
|
mas01cr@239
|
266 if (0 < thispowerfd) {
|
mas01cr@239
|
267 close(thispowerfd);
|
mas01cr@239
|
268 }
|
mas01cr@239
|
269 }
|
mas01cr@239
|
270 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
|
mas01cr@239
|
271
|
mas01cr@239
|
272 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@239
|
273
|
mas01cr@239
|
274 // Increment file count
|
mas01cr@239
|
275 dbH->numFiles++;
|
mas01cr@239
|
276
|
mas01cr@239
|
277 // Update Header information
|
mas01cr@239
|
278 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@239
|
279
|
mas01cr@239
|
280 // Update track to file index map
|
mas01cr@239
|
281 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01cr@239
|
282
|
mas01cr@239
|
283 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@239
|
284
|
mas01cr@239
|
285 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@239
|
286 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@239
|
287 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@239
|
288
|
mas01cr@239
|
289 totalVectors+=numVectors;
|
mas01cr@239
|
290
|
mas01cr@239
|
291 // Copy the header back to the database
|
mas01cr@239
|
292 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@239
|
293 }
|
mas01cr@239
|
294 }
|
mas01cr@239
|
295 // CLEAN UP
|
mas01cr@239
|
296 munmap(indata,statbuf.st_size);
|
mas01cr@239
|
297 close(infid);
|
mas01cr@239
|
298 } while(!filesIn->eof());
|
mas01cr@239
|
299
|
mas01cr@239
|
300 VERB_LOG(0, "%s %s %u vectors %ju bytes.\n", COM_BATCHINSERT, dbName, totalVectors, (intmax_t) (totalVectors * dbH->dim * sizeof(double)));
|
mas01cr@239
|
301
|
mas01cr@239
|
302 // Report status
|
mas01cr@239
|
303 status(dbName);
|
mas01cr@239
|
304 }
|