mas01cr@204
|
1 #include "audioDB.h"
|
mas01cr@204
|
2
|
mas01cr@204
|
3 bool audioDB::enough_data_space_free(off_t size) {
|
mas01cr@204
|
4 return(dbH->timesTableOffset > dbH->dataOffset + dbH->length + size);
|
mas01cr@204
|
5 }
|
mas01cr@204
|
6
|
mas01cr@204
|
7 void audioDB::insert_data_vectors(off_t offset, void *buffer, size_t size) {
|
mas01cr@204
|
8 lseek(dbfid, dbH->dataOffset + offset, SEEK_SET);
|
mas01cr@204
|
9 write(dbfid, buffer, size);
|
mas01cr@204
|
10 }
|
mas01cr@204
|
11
|
mas01cr@204
|
12 void audioDB::insert(const char* dbName, const char* inFile) {
|
mas01cr@204
|
13 forWrite = true;
|
mas01cr@204
|
14 initTables(dbName, inFile);
|
mas01cr@204
|
15
|
mas01cr@204
|
16 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@204
|
17 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@204
|
18
|
mas01cr@204
|
19 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@204
|
20 error("Must use power with power-enabled database", dbName);
|
mas01cr@204
|
21
|
mas01cr@204
|
22 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@204
|
23 error("Insert failed: no more room in database", inFile);
|
mas01cr@204
|
24 }
|
mas01cr@204
|
25
|
mas01cr@204
|
26 if(!key)
|
mas01cr@204
|
27 key=inFile;
|
mas01cr@204
|
28 // Linear scan of filenames check for pre-existing feature
|
mas01cr@204
|
29 unsigned alreadyInserted=0;
|
mas01cr@204
|
30 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@204
|
31 if(strncmp(fileTable + k*O2_FILETABLESIZE, key, strlen(key)+1)==0){
|
mas01cr@204
|
32 alreadyInserted=1;
|
mas01cr@204
|
33 break;
|
mas01cr@204
|
34 }
|
mas01cr@204
|
35
|
mas01cr@204
|
36 if(alreadyInserted){
|
mas01cr@204
|
37 if(verbosity) {
|
mas01cr@204
|
38 std::cerr << "Warning: key already exists in database, ignoring: " <<inFile << std::endl;
|
mas01cr@204
|
39 }
|
mas01cr@204
|
40 return;
|
mas01cr@204
|
41 }
|
mas01cr@204
|
42
|
mas01cr@204
|
43 // Make a track index table of features to file indexes
|
mas01cr@204
|
44 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@204
|
45 if(!numVectors){
|
mas01cr@204
|
46 if(verbosity) {
|
mas01cr@204
|
47 std::cerr << "Warning: ignoring zero-length feature vector file:" << key << std::endl;
|
mas01cr@204
|
48 }
|
mas01cr@204
|
49 // CLEAN UP
|
mas01cr@204
|
50 munmap(indata,statbuf.st_size);
|
mas01cr@204
|
51 munmap(db,dbH->dbSize);
|
mas01cr@204
|
52 close(infid);
|
mas01cr@204
|
53 return;
|
mas01cr@204
|
54 }
|
mas01cr@204
|
55
|
mas01cr@204
|
56 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, key, strlen(key));
|
mas01cr@204
|
57
|
mas01cr@204
|
58 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@204
|
59
|
mas01cr@204
|
60 // Check times status and insert times from file
|
mas01cr@204
|
61 unsigned indexoffset = insertoffset/(dbH->dim*sizeof(double));
|
mas01cr@204
|
62 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@204
|
63
|
mas01cr@204
|
64 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@204
|
65 error("out of space for times", key);
|
mas01cr@204
|
66 }
|
mas01cr@204
|
67
|
mas01cr@204
|
68 if (usingTimes) {
|
mas01cr@204
|
69 insertTimeStamps(numVectors, timesFile, timesdata);
|
mas01cr@204
|
70 }
|
mas01cr@204
|
71
|
mas01cr@204
|
72 double *powerdata = powerTable + indexoffset;
|
mas01cr@204
|
73 insertPowerData(numVectors, powerfd, powerdata);
|
mas01cr@204
|
74
|
mas01cr@204
|
75 // Increment file count
|
mas01cr@204
|
76 dbH->numFiles++;
|
mas01cr@204
|
77
|
mas01cr@204
|
78 // Update Header information
|
mas01cr@204
|
79 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@204
|
80
|
mas01cr@204
|
81 // Update track to file index map
|
mas01cr@204
|
82 memcpy(trackTable + dbH->numFiles - 1, &numVectors, sizeof(unsigned));
|
mas01cr@204
|
83
|
mas01cr@204
|
84 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@204
|
85
|
mas01cr@204
|
86 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@204
|
87 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@204
|
88 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@204
|
89
|
mas01cr@204
|
90 // Report status
|
mas01cr@204
|
91 status(dbName);
|
mas01cr@204
|
92 if(verbosity) {
|
mas01cr@204
|
93 std::cerr << COM_INSERT << " " << dbName << " " << numVectors << " vectors "
|
mas01cr@204
|
94 << (statbuf.st_size-sizeof(int)) << " bytes." << std::endl;
|
mas01cr@204
|
95 }
|
mas01cr@204
|
96
|
mas01cr@204
|
97 // Copy the header back to the database
|
mas01cr@204
|
98 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@204
|
99
|
mas01cr@204
|
100 // CLEAN UP
|
mas01cr@204
|
101 munmap(indata,statbuf.st_size);
|
mas01cr@204
|
102 close(infid);
|
mas01cr@204
|
103 }
|
mas01cr@204
|
104
|
mas01cr@204
|
105 void audioDB::insertTimeStamps(unsigned numVectors, std::ifstream *timesFile, double *timesdata) {
|
mas01cr@204
|
106 assert(usingTimes);
|
mas01cr@204
|
107
|
mas01cr@204
|
108 unsigned numtimes = 0;
|
mas01cr@204
|
109
|
mas01cr@204
|
110 if(!(dbH->flags & O2_FLAG_TIMES) && !dbH->numFiles) {
|
mas01cr@204
|
111 dbH->flags=dbH->flags|O2_FLAG_TIMES;
|
mas01cr@204
|
112 } else if(!(dbH->flags & O2_FLAG_TIMES)) {
|
mas01cr@204
|
113 error("Timestamp file used with non-timestamped database", timesFileName);
|
mas01cr@204
|
114 }
|
mas01cr@204
|
115
|
mas01cr@204
|
116 if(!timesFile->is_open()) {
|
mas01cr@204
|
117 error("problem opening times file on timestamped database", timesFileName);
|
mas01cr@204
|
118 }
|
mas01cr@204
|
119
|
mas01cr@204
|
120 double timepoint, next;
|
mas01cr@204
|
121 *timesFile >> timepoint;
|
mas01cr@204
|
122 if (timesFile->eof()) {
|
mas01cr@204
|
123 error("no entries in times file", timesFileName);
|
mas01cr@204
|
124 }
|
mas01cr@204
|
125 numtimes++;
|
mas01cr@204
|
126 do {
|
mas01cr@204
|
127 *timesFile >> next;
|
mas01cr@204
|
128 if (timesFile->eof()) {
|
mas01cr@204
|
129 break;
|
mas01cr@204
|
130 }
|
mas01cr@204
|
131 numtimes++;
|
mas01cr@204
|
132 timesdata[0] = timepoint;
|
mas01cr@204
|
133 timepoint = (timesdata[1] = next);
|
mas01cr@204
|
134 timesdata += 2;
|
mas01cr@204
|
135 } while (numtimes < numVectors + 1);
|
mas01cr@204
|
136
|
mas01cr@204
|
137 if (numtimes < numVectors + 1) {
|
mas01cr@204
|
138 error("too few timepoints in times file", timesFileName);
|
mas01cr@204
|
139 }
|
mas01cr@204
|
140
|
mas01cr@204
|
141 *timesFile >> next;
|
mas01cr@204
|
142 if (!timesFile->eof()) {
|
mas01cr@204
|
143 error("too many timepoints in times file", timesFileName);
|
mas01cr@204
|
144 }
|
mas01cr@204
|
145 }
|
mas01cr@204
|
146
|
mas01cr@204
|
147 void audioDB::insertPowerData(unsigned numVectors, int powerfd, double *powerdata) {
|
mas01cr@204
|
148 if (usingPower) {
|
mas01cr@204
|
149 if (!(dbH->flags & O2_FLAG_POWER)) {
|
mas01cr@204
|
150 error("Cannot insert power data on non-power DB", dbName);
|
mas01cr@204
|
151 }
|
mas01cr@204
|
152
|
mas01cr@204
|
153 int one;
|
mas01cr@204
|
154 unsigned int count;
|
mas01cr@204
|
155
|
mas01cr@204
|
156 count = read(powerfd, &one, sizeof(unsigned int));
|
mas01cr@204
|
157 if (count != sizeof(unsigned int)) {
|
mas01cr@204
|
158 error("powerfd read failed", "int", "read");
|
mas01cr@204
|
159 }
|
mas01cr@204
|
160 if (one != 1) {
|
mas01cr@204
|
161 error("dimensionality of power file not 1", powerFileName);
|
mas01cr@204
|
162 }
|
mas01cr@204
|
163
|
mas01cr@204
|
164 // FIXME: should check that the powerfile is the right size for
|
mas01cr@204
|
165 // this. -- CSR, 2007-10-30
|
mas01cr@204
|
166 count = read(powerfd, powerdata, numVectors * sizeof(double));
|
mas01cr@204
|
167 if (count != numVectors * sizeof(double)) {
|
mas01cr@204
|
168 error("powerfd read failed", "double", "read");
|
mas01cr@204
|
169 }
|
mas01cr@204
|
170 }
|
mas01cr@204
|
171 }
|
mas01cr@204
|
172
|
mas01cr@204
|
173 void audioDB::batchinsert(const char* dbName, const char* inFile) {
|
mas01cr@204
|
174
|
mas01cr@204
|
175 forWrite = true;
|
mas01cr@204
|
176 initDBHeader(dbName);
|
mas01cr@204
|
177
|
mas01cr@204
|
178 if(!key)
|
mas01cr@204
|
179 key=inFile;
|
mas01cr@204
|
180 std::ifstream *filesIn = 0;
|
mas01cr@204
|
181 std::ifstream *keysIn = 0;
|
mas01cr@204
|
182 std::ifstream* thisTimesFile = 0;
|
mas01cr@204
|
183 int thispowerfd = 0;
|
mas01cr@204
|
184
|
mas01cr@204
|
185 if(!(filesIn = new std::ifstream(inFile)))
|
mas01cr@204
|
186 error("Could not open batch in file", inFile);
|
mas01cr@204
|
187 if(key && key!=inFile)
|
mas01cr@204
|
188 if(!(keysIn = new std::ifstream(key)))
|
mas01cr@204
|
189 error("Could not open batch key file",key);
|
mas01cr@204
|
190
|
mas01cr@204
|
191 if(!usingTimes && (dbH->flags & O2_FLAG_TIMES))
|
mas01cr@204
|
192 error("Must use timestamps with timestamped database","use --times");
|
mas01cr@204
|
193
|
mas01cr@204
|
194 if(!usingPower && (dbH->flags & O2_FLAG_POWER))
|
mas01cr@204
|
195 error("Must use power with power-enabled database", dbName);
|
mas01cr@204
|
196
|
mas01cr@204
|
197 unsigned totalVectors=0;
|
mas01cr@204
|
198 char *thisKey = new char[MAXSTR];
|
mas01cr@204
|
199 char *thisFile = new char[MAXSTR];
|
mas01cr@204
|
200 char *thisTimesFileName = new char[MAXSTR];
|
mas01cr@204
|
201 char *thisPowerFileName = new char[MAXSTR];
|
mas01cr@204
|
202
|
mas01cr@204
|
203 do{
|
mas01cr@204
|
204 filesIn->getline(thisFile,MAXSTR);
|
mas01cr@204
|
205 if(key && key!=inFile)
|
mas01cr@204
|
206 keysIn->getline(thisKey,MAXSTR);
|
mas01cr@204
|
207 else
|
mas01cr@204
|
208 thisKey = thisFile;
|
mas01cr@204
|
209 if(usingTimes)
|
mas01cr@204
|
210 timesFile->getline(thisTimesFileName,MAXSTR);
|
mas01cr@204
|
211 if(usingPower)
|
mas01cr@204
|
212 powerFile->getline(thisPowerFileName, MAXSTR);
|
mas01cr@204
|
213
|
mas01cr@204
|
214 if(filesIn->eof())
|
mas01cr@204
|
215 break;
|
mas01cr@204
|
216
|
mas01cr@204
|
217 initInputFile(thisFile);
|
mas01cr@204
|
218
|
mas01cr@204
|
219 if(!enough_data_space_free(statbuf.st_size - sizeof(int))) {
|
mas01cr@204
|
220 error("batchinsert failed: no more room in database", thisFile);
|
mas01cr@204
|
221 }
|
mas01cr@204
|
222
|
mas01cr@204
|
223 // Linear scan of filenames check for pre-existing feature
|
mas01cr@204
|
224 unsigned alreadyInserted=0;
|
mas01cr@204
|
225
|
mas01cr@204
|
226 for(unsigned k=0; k<dbH->numFiles; k++)
|
mas01cr@204
|
227 if(strncmp(fileTable + k*O2_FILETABLESIZE, thisKey, strlen(thisKey)+1)==0){
|
mas01cr@204
|
228 alreadyInserted=1;
|
mas01cr@204
|
229 break;
|
mas01cr@204
|
230 }
|
mas01cr@204
|
231
|
mas01cr@204
|
232 if(alreadyInserted){
|
mas01cr@204
|
233 if(verbosity) {
|
mas01cr@204
|
234 std::cerr << "Warning: key already exists in database:" << thisKey << std::endl;
|
mas01cr@204
|
235 }
|
mas01cr@204
|
236 }
|
mas01cr@204
|
237 else{
|
mas01cr@204
|
238
|
mas01cr@204
|
239 // Make a track index table of features to file indexes
|
mas01cr@204
|
240 unsigned numVectors = (statbuf.st_size-sizeof(int))/(sizeof(double)*dbH->dim);
|
mas01cr@204
|
241 if(!numVectors){
|
mas01cr@204
|
242 if(verbosity) {
|
mas01cr@204
|
243 std::cerr << "Warning: ignoring zero-length feature vector file:" << thisKey << std::endl;
|
mas01cr@204
|
244 }
|
mas01cr@204
|
245 }
|
mas01cr@204
|
246 else{
|
mas01cr@204
|
247 if(usingTimes){
|
mas01cr@204
|
248 if(timesFile->eof()) {
|
mas01cr@204
|
249 error("not enough timestamp files in timesList", timesFileName);
|
mas01cr@204
|
250 }
|
mas01cr@204
|
251 thisTimesFile = new std::ifstream(thisTimesFileName,std::ios::in);
|
mas01cr@204
|
252 if(!thisTimesFile->is_open()) {
|
mas01cr@204
|
253 error("Cannot open timestamp file", thisTimesFileName);
|
mas01cr@204
|
254 }
|
mas01cr@204
|
255 off_t insertoffset = dbH->length;
|
mas01cr@204
|
256 unsigned indexoffset = insertoffset / (dbH->dim*sizeof(double));
|
mas01cr@204
|
257 double *timesdata = timesTable + 2*indexoffset;
|
mas01cr@204
|
258 if(2*(indexoffset + numVectors) > timesTableLength) {
|
mas01cr@204
|
259 error("out of space for times", key);
|
mas01cr@204
|
260 }
|
mas01cr@204
|
261 insertTimeStamps(numVectors, thisTimesFile, timesdata);
|
mas01cr@204
|
262 if(thisTimesFile)
|
mas01cr@204
|
263 delete thisTimesFile;
|
mas01cr@204
|
264 }
|
mas01cr@204
|
265
|
mas01cr@204
|
266 if (usingPower) {
|
mas01cr@204
|
267 if(powerFile->eof()) {
|
mas01cr@204
|
268 error("not enough power files in powerList", powerFileName);
|
mas01cr@204
|
269 }
|
mas01cr@204
|
270 thispowerfd = open(thisPowerFileName, O_RDONLY);
|
mas01cr@204
|
271 if (thispowerfd < 0) {
|
mas01cr@204
|
272 error("failed to open power file", thisPowerFileName);
|
mas01cr@204
|
273 }
|
mas01cr@204
|
274 unsigned insertoffset = dbH->length;
|
mas01cr@204
|
275 unsigned poweroffset = insertoffset / (dbH->dim * sizeof(double));
|
mas01cr@204
|
276 double *powerdata = powerTable + poweroffset;
|
mas01cr@204
|
277 insertPowerData(numVectors, thispowerfd, powerdata);
|
mas01cr@204
|
278 if (0 < thispowerfd) {
|
mas01cr@204
|
279 close(thispowerfd);
|
mas01cr@204
|
280 }
|
mas01cr@204
|
281 }
|
mas01cr@204
|
282 strncpy(fileTable + dbH->numFiles*O2_FILETABLESIZE, thisKey, strlen(thisKey));
|
mas01cr@204
|
283
|
mas01cr@204
|
284 off_t insertoffset = dbH->length;// Store current state
|
mas01cr@204
|
285
|
mas01cr@204
|
286 // Increment file count
|
mas01cr@204
|
287 dbH->numFiles++;
|
mas01cr@204
|
288
|
mas01cr@204
|
289 // Update Header information
|
mas01cr@204
|
290 dbH->length+=(statbuf.st_size-sizeof(int));
|
mas01cr@204
|
291
|
mas01cr@204
|
292 // Update track to file index map
|
mas01cr@204
|
293 memcpy (trackTable+dbH->numFiles-1, &numVectors, sizeof(unsigned));
|
mas01cr@204
|
294
|
mas01cr@204
|
295 insert_data_vectors(insertoffset, indata + sizeof(int), statbuf.st_size - sizeof(int));
|
mas01cr@204
|
296
|
mas01cr@204
|
297 // Norm the vectors on input if the database is already L2 normed
|
mas01cr@204
|
298 if(dbH->flags & O2_FLAG_L2NORM)
|
mas01cr@204
|
299 unitNormAndInsertL2((double *)(indata + sizeof(int)), dbH->dim, numVectors, 1); // append
|
mas01cr@204
|
300
|
mas01cr@204
|
301 totalVectors+=numVectors;
|
mas01cr@204
|
302
|
mas01cr@204
|
303 // Copy the header back to the database
|
mas01cr@204
|
304 memcpy (db, dbH, sizeof(dbTableHeaderT));
|
mas01cr@204
|
305 }
|
mas01cr@204
|
306 }
|
mas01cr@204
|
307 // CLEAN UP
|
mas01cr@204
|
308 munmap(indata,statbuf.st_size);
|
mas01cr@204
|
309 close(infid);
|
mas01cr@204
|
310 }while(!filesIn->eof());
|
mas01cr@204
|
311
|
mas01cr@204
|
312 if(verbosity) {
|
mas01cr@204
|
313 std::cerr << COM_BATCHINSERT << " " << dbName << " " << totalVectors << " vectors "
|
mas01cr@204
|
314 << totalVectors*dbH->dim*sizeof(double) << " bytes." << std::endl;
|
mas01cr@204
|
315 }
|
mas01cr@204
|
316
|
mas01cr@204
|
317 // Report status
|
mas01cr@204
|
318 status(dbName);
|
mas01cr@204
|
319 }
|