00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "IDMap_AMOS.hh"
00011 #include <cstdio>
00012 #include <string>
00013 #include <sstream>
00014 #include <cstring>
00015 using namespace AMOS;
00016 using namespace std;
00017
00018 const int MAX_EID_LENGTH = 2048;
00019
00020
00021
00022
00023
00024 IDMap_t::HashTriple_t & IDMap_t::HashTriple_t::operator=
00025 (const HashTriple_t & s)
00026 {
00027 if ( this != &s )
00028 {
00029 c = s.c;
00030 iid = s.iid;
00031 bid = s.bid;
00032 eid = s.eid;
00033 }
00034 return *this;
00035 }
00036
00037
00038
00039
00040
00041
00042 void IDMap_t::HashNode_t::clear ( )
00043 {
00044 if ( triple != NULL )
00045 if ( -- (triple -> c) == 0 )
00046 delete triple;
00047 delete next;
00048
00049 triple = NULL;
00050 next = NULL;
00051 }
00052
00053
00054
00055
00056
00057 const NCode_t IDMap_t::NCODE = M_IDMAP;
00058 const Size_t IDMap_t::DEFAULT_NUM_BUCKETS = 1000;
00059
00060
00061
00062 void IDMap_t::clear ( )
00063 {
00064 if ( size_m > 0 )
00065 {
00066 vector<HashNode_t>::iterator hni;
00067 for ( hni = iid_bucs_m . begin( ); hni != iid_bucs_m . end( ); hni ++ )
00068 hni -> clearchain( );
00069 for ( hni = eid_bucs_m . begin( ); hni != eid_bucs_m . end( ); hni ++ )
00070 hni -> clearchain( );
00071 size_m = 0;
00072 }
00073 type_m = NULL_NCODE;
00074 }
00075
00076
00077
00078 Size_t IDMap_t::minbuckets (Size_t min)
00079 {
00080
00081 const uint8_t NUM_BUCKET_SIZES = 27;
00082 const Size_t BUCKET_SIZES [NUM_BUCKET_SIZES] =
00083 {
00084 53l, 97l, 193l, 389l, 769l,
00085 1543l, 3079l, 6151l, 12289l, 24593l,
00086 49157l, 98317l, 196613l, 393241l, 786433l,
00087 1572869l, 3145739l, 6291469l, 12582917l, 25165843l,
00088 50331653l, 100663319l, 201326611l, 402653189l, 805306457l,
00089 1610612741l, 2147354947l
00090 };
00091
00092 uint8_t first = 0;
00093 uint8_t len = NUM_BUCKET_SIZES;
00094 uint8_t half, middle;
00095
00096
00097 while ( len > 0 )
00098 {
00099 half = len >> 1;
00100 middle = first + half;
00101 if ( BUCKET_SIZES [middle] < min )
00102 {
00103 first = middle + 1;
00104 len = len - half - 1;
00105 }
00106 else
00107 len = half;
00108 }
00109 if ( first == NUM_BUCKET_SIZES )
00110 first --;
00111
00112 return BUCKET_SIZES [first];
00113 }
00114
00115
00116
00117 bool IDMap_t::lookupnode (ID_t key, HashNode_t * & node) const
00118 {
00119 if ( key == NULL_ID )
00120 {
00121 node = NULL;
00122 return false;
00123 }
00124 for ( node = hashfunc (key); node -> next != NULL; node = node -> next )
00125 if ( node -> triple -> iid == key )
00126 return true;
00127 if ( node -> triple == NULL || node -> triple -> iid != key )
00128 return false;
00129 else
00130 return true;
00131 }
00132
00133
00134
00135 bool IDMap_t::lookupnode (const string & key, HashNode_t * & node) const
00136 {
00137 if ( key . empty( ) )
00138 {
00139 node = NULL;
00140 return false;
00141 }
00142 for ( node = hashfunc (key); node -> next != NULL; node = node -> next )
00143 if ( node -> triple -> eid == key )
00144 return true;
00145 if ( node -> triple == NULL || node -> triple -> eid != key )
00146 return false;
00147 else
00148 return true;
00149 }
00150
00151
00152
00153 void IDMap_t::removenode (HashNode_t * curr, HashNode_t * prev)
00154 {
00155 if ( prev == NULL )
00156
00157 {
00158 if ( curr -> next == NULL )
00159 curr -> clear( );
00160 else
00161 {
00162 prev = curr -> next;
00163 curr -> next = NULL;
00164 curr -> clear( );
00165 *curr = *prev;
00166 delete prev;
00167 }
00168 }
00169 else
00170
00171 {
00172 prev -> next = curr -> next;
00173 curr -> next = NULL;
00174 curr -> clear( );
00175 delete curr;
00176 }
00177 }
00178
00179
00180
00181 void IDMap_t::concat (const IDMap_t & s)
00182 {
00183 if ( this != &s )
00184 {
00185 resize (getSize( ) + s . getSize( ));
00186
00187
00188 for ( const_iterator itr = s . begin( ); itr != s . end( ); ++ itr )
00189 insert (itr -> iid, itr -> eid, itr -> bid);
00190 }
00191 }
00192
00193
00194
00195 const IDMap_t::HashTriple_t * IDMap_t::insert (ID_t iid,
00196 const string & eid,
00197 ID_t bid)
00198 {
00199 if ( iid == NULL_ID && eid . empty( ) ) return NULL;
00200
00201 HashNode_t * curri, * curre;
00202
00203
00204
00205 if ( lookupnode (iid, curri) )
00206 {
00207 ostringstream ss;
00208 ss << "Cannot insert int key '" << iid << "' multiple times";
00209 AMOS_THROW_ARGUMENT (ss . str( ));
00210 }
00211 if ( lookupnode (eid, curre) )
00212 {
00213 ostringstream ss;
00214 ss << "Cannot insert string key '" << eid << "' multiple times";
00215 AMOS_THROW_ARGUMENT (ss . str( ));
00216 }
00217
00218 HashTriple_t * currt = new HashTriple_t (iid, eid, bid);
00219
00220 if ( curri != NULL )
00221 {
00222 if ( curri -> triple != NULL )
00223 curri = curri -> next = new HashNode_t( );
00224 curri -> triple = currt;
00225 currt -> c ++;
00226 }
00227 if ( curre != NULL )
00228 {
00229 if ( curre -> triple != NULL )
00230 curre = curre -> next = new HashNode_t( );
00231 curre -> triple = currt;
00232 currt -> c ++;
00233 }
00234
00235 if ( ++ size_m >= getBuckets( ) )
00236 resize (size_m + 1);
00237
00238 return currt;
00239 }
00240
00241
00242
00243 void IDMap_t::readMessage (const Message_t & msg)
00244 {
00245 clear( );
00246
00247 try {
00248 Size_t size = -1;
00249 istringstream ss;
00250
00251 if ( msg . exists (F_OBJECT) )
00252 {
00253 string str = msg . getField (F_OBJECT);
00254
00255 if ( str . length( ) != NCODE_SIZE )
00256 AMOS_THROW_ARGUMENT ("Invalid object type format");
00257 type_m = Encode (str);
00258 }
00259
00260 if ( msg . exists (F_SIZE) )
00261 {
00262 ss . str (msg . getField (F_SIZE));
00263 ss >> size;
00264 if ( !ss )
00265 AMOS_THROW_ARGUMENT ("Invalid size format");
00266 ss . clear( );
00267
00268 resize (size);
00269 }
00270
00271 if ( msg . exists (F_MAP) )
00272 {
00273 string eid;
00274 ID_t bid, iid;
00275 ss . str (msg . getField (F_MAP));
00276
00277 while ( ss )
00278 {
00279 ss >> bid >> iid;
00280 ss . ignore( );
00281 getline (ss, eid);
00282 if ( ! ss . fail( ) )
00283 insert (iid, eid, bid);
00284 }
00285
00286 if ( ! ss . eof( ) )
00287 AMOS_THROW_ARGUMENT ("Invalid map format");
00288 ss . clear( );
00289
00290 if ( size >= 0 && size != size_m )
00291 AMOS_THROW_ARGUMENT ("map and size fields do not agree");
00292 }
00293 }
00294 catch (ArgumentException_t) {
00295
00296 clear( );
00297 throw;
00298 }
00299 }
00300
00301
00302
00303 void IDMap_t::remove (ID_t key)
00304 {
00305 if ( key == NULL_ID ) return;
00306 string eid;
00307
00308 HashNode_t * prev = NULL;
00309 HashNode_t * curr = hashfunc (key);
00310 if ( curr -> triple != NULL )
00311 for ( ; curr != NULL; curr = curr -> next )
00312 {
00313 if ( curr -> triple -> iid == key )
00314 {
00315 size_m --;
00316 if ( curr -> triple -> c > 1 )
00317 eid = curr -> triple -> eid;
00318 removenode (curr, prev);
00319 break;
00320 }
00321 prev = curr;
00322 }
00323
00324 if ( eid . empty( ) )
00325 return;
00326
00327 prev = NULL;
00328 curr = hashfunc (eid);
00329 if ( curr -> triple != NULL )
00330 for ( ; curr != NULL; curr = curr -> next )
00331 {
00332 if ( curr -> triple -> eid == eid )
00333 {
00334 removenode (curr, prev);
00335 break;
00336 }
00337 prev = curr;
00338 }
00339 }
00340
00341
00342
00343 void IDMap_t::remove (const string & key)
00344 {
00345 if ( key . empty( ) ) return;
00346 ID_t iid = NULL_ID;
00347
00348 HashNode_t * prev = NULL;
00349 HashNode_t * curr = hashfunc (key);
00350 if ( curr -> triple != NULL )
00351 for ( ; curr != NULL; curr = curr -> next )
00352 {
00353 if ( curr -> triple -> eid == key )
00354 {
00355 size_m --;
00356 if ( curr -> triple -> c > 1 )
00357 iid = curr -> triple -> iid;
00358 removenode (curr, prev);
00359 break;
00360 }
00361 prev = curr;
00362 }
00363
00364 if ( iid == NULL_ID )
00365 return;
00366
00367 prev = NULL;
00368 curr = hashfunc (iid);
00369 if ( curr -> triple != NULL )
00370 for ( ; curr != NULL; curr = curr -> next )
00371 {
00372 if ( curr -> triple -> iid == iid )
00373 {
00374 removenode (curr, prev);
00375 break;
00376 }
00377 prev = curr;
00378 }
00379 }
00380
00381
00382
00383 void IDMap_t::resize (Size_t min)
00384 {
00385 if ( (min = minbuckets (min)) == getBuckets( ) )
00386 return;
00387
00388 vector<HashTriple_t *> triples (size_m);
00389 long int pos = 0;
00390
00391 if ( size_m > 0 )
00392 {
00393
00394 for ( iterator itr = begin( ); itr != end ( ); ++ itr )
00395 {
00396 itr -> c ++;
00397 triples [pos ++] = itr;
00398 }
00399
00400 Size_t size = size_m;
00401 NCode_t type = type_m;
00402 clear( );
00403 size_m = size;
00404 type_m = type;
00405 }
00406
00407
00408 if ( size_m != pos )
00409 AMOS_THROW ("Unknown fatal error during hash resize");
00410 iid_bucs_m . resize (min);
00411 eid_bucs_m . resize (min);
00412
00413
00414 HashNode_t * curri, * curre;
00415 vector<HashTriple_t *>::iterator tpi;
00416 for ( tpi = triples . begin( ); tpi != triples . end( ); tpi ++ )
00417 {
00418 lookupnode ((*tpi) -> iid, curri);
00419 lookupnode ((*tpi) -> eid, curre);
00420
00421 if ( curri != NULL )
00422 {
00423 if ( curri -> triple != NULL )
00424 curri = curri -> next = new HashNode_t( );
00425 curri -> triple = (*tpi);
00426 curri -> triple -> c ++;
00427 }
00428 if ( curre != NULL )
00429 {
00430 if ( curre -> triple != NULL )
00431 curre = curre -> next = new HashNode_t( );
00432 curre -> triple = (*tpi);
00433 curre -> triple -> c ++;
00434 }
00435
00436 (*tpi) -> c --;
00437 }
00438 }
00439
00440
00441
00442 void IDMap_t::writeMessage (Message_t & msg) const
00443 {
00444 msg . clear( );
00445
00446 try {
00447 ostringstream ss;
00448
00449 msg . setMessageCode (IDMap_t::NCODE);
00450
00451 if ( type_m != NULL_NCODE )
00452 msg . setField (F_OBJECT, Decode (type_m));
00453
00454 ss << size_m;
00455 msg . setField (F_SIZE, ss . str( ));
00456 ss . str (NULL_STRING);
00457
00458 if (size_m != 0 )
00459 {
00460
00461 string str;
00462 for ( const_iterator itr = begin( ); itr != end( ); ++ itr )
00463 {
00464 ss << itr -> bid << '\t' << itr -> iid << '\t';
00465 str . append (ss . str( ));
00466 ss . str (NULL_STRING);
00467 str . append (itr -> eid);
00468 str . push_back (NL_CHAR);
00469 }
00470
00471 msg . setField (F_MAP, str);
00472 }
00473 }
00474 catch (ArgumentException_t) {
00475
00476 msg . clear( );
00477 throw;
00478 }
00479 }
00480
00481
00482
00483 IDMap_t & IDMap_t::operator= (const IDMap_t & s)
00484 {
00485 if ( this != &s )
00486 {
00487 clear( );
00488 type_m = s . type_m;
00489 concat (s);
00490 }
00491
00492 return *this;
00493 }
00494
00495
00496
00497 void IDMap_t::read(const std::string & path)
00498 {
00499 Size_t size;
00500 ID_t bid, iid;
00501 char buffer [MAX_EID_LENGTH+1];
00502
00503 clear( );
00504
00505 FILE * fp = fopen(path.c_str(), "r");
00506
00507 if (!fp)
00508 { AMOS_THROW_IO("Could not open bank partition, " + path); }
00509
00510 if (fscanf(fp, "%s %d", buffer, &size) != 2)
00511 { AMOS_THROW_IO("Invalid Header, Map file is corrupted for " + path); }
00512
00513 type_m = Encode (buffer);
00514 resize (size);
00515
00516 while (fscanf(fp, "%d\t%d", &bid, &iid) == 2)
00517 {
00518 int c = getc(fp);
00519 if (c != '\t')
00520 { AMOS_THROW_IO("Map file is corrupted for " + path); }
00521
00522
00523 int i;
00524 bool done = false;
00525 for (i = 0; i <= MAX_EID_LENGTH; i++)
00526 {
00527 c = getc(fp);
00528 buffer[i] = c;
00529
00530 if (c == '\n' || c == EOF)
00531 {
00532 buffer[i] = '\0';
00533 done = true; break;
00534 }
00535 }
00536
00537 if (!done)
00538 {
00539 buffer[MAX_EID_LENGTH] = '\0';
00540 int chop = 0;
00541 while (!(c == '\n' || c == EOF))
00542 {
00543 chop++;
00544 c = getc(fp);
00545 }
00546
00547 cerr << "WARNING: Truncated EID for IID " << iid << " to " << MAX_EID_LENGTH << " characters (chopped last " << chop <<")" << endl;
00548 }
00549
00550 insert (iid, buffer, bid);
00551 }
00552
00553 if (size_m != size)
00554 {
00555 AMOS_THROW_IO ("Bank Corrupted: Number of named elements doesn't match map header in " + path);
00556 }
00557
00558 fclose(fp);
00559 }
00560
00561
00562
00563 void IDMap_t::write (ostream & out) const
00564 {
00565 out << Decode(type_m) << ' ' << size_m << NL_CHAR;
00566
00567 for ( const_iterator itr = begin( ); itr != end( ); ++ itr )
00568 out << itr -> bid << '\t' << itr -> iid << '\t' << itr -> eid << NL_CHAR;
00569 }
00570
00571
00572
00573
00574
00575
00576
00577 IDMap_t::iterator::iterator (vector<HashNode_t> * iid_bucs_p,
00578 vector<HashNode_t> * eid_bucs_p)
00579 : iid_bucs (iid_bucs_p), eid_bucs (eid_bucs_p)
00580 {
00581 if ( iid_bucs -> empty( ) )
00582 curr = NULL;
00583 else
00584 {
00585 iids = true;
00586 buc = iid_bucs -> begin( );
00587 curr = &(*buc);
00588 if ( curr -> triple == NULL )
00589 this->operator++();
00590 }
00591 }
00592
00593
00594
00595 IDMap_t::iterator & IDMap_t::iterator::operator++ ( )
00596 {
00597 while ( curr != NULL )
00598 {
00599 curr = curr -> next;
00600 while ( curr == NULL )
00601 {
00602 ++ buc;
00603
00604 if ( iids && buc >= iid_bucs -> end( ) )
00605 {
00606 iids = false;
00607 buc = eid_bucs -> begin( );
00608 }
00609
00610 if ( !iids && buc >= eid_bucs -> end( ) )
00611 return *this;
00612
00613 if ( buc -> triple != NULL )
00614 curr = &(*buc);
00615 }
00616
00617 if ( iids || curr -> triple -> c == 1 )
00618 break;
00619 }
00620 return *this;
00621 }
00622
00623
00624
00625 IDMap_t::const_iterator::const_iterator (const vector<HashNode_t> * iid_bucs_p,
00626 const vector<HashNode_t> * eid_bucs_p)
00627 : iid_bucs (iid_bucs_p), eid_bucs (eid_bucs_p)
00628 {
00629 if ( iid_bucs -> empty( ) )
00630 curr = NULL;
00631 else
00632 {
00633 iids = true;
00634 buc = iid_bucs -> begin( );
00635 curr = &(*buc);
00636 if ( curr -> triple == NULL )
00637 this->operator++();
00638 }
00639 }
00640
00641
00642
00643 IDMap_t::const_iterator & IDMap_t::const_iterator::operator++ ( )
00644 {
00645 while ( curr != NULL )
00646 {
00647 curr = curr -> next;
00648 while ( curr == NULL )
00649 {
00650 ++ buc;
00651
00652 if ( iids && buc >= iid_bucs -> end( ) )
00653 {
00654 iids = false;
00655 buc = eid_bucs -> begin( );
00656 }
00657
00658 if ( !iids && buc >= eid_bucs -> end( ) )
00659 return *this;
00660
00661 if ( buc -> triple != NULL )
00662 curr = &(*buc);
00663 }
00664
00665 if ( iids || curr -> triple -> c == 1 )
00666 break;
00667 }
00668 return *this;
00669 }