IDMap_AMOS.cc

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 #include "IDMap_AMOS.hh"
00011 #include <cstdio>
00012 #include <string>
00013 #include <sstream>
00014 #include <cstring>
00015 using namespace AMOS;
00016 using namespace std;
00017 
00018 const int MAX_EID_LENGTH = 2048; 
00019 
00020 
00021 
00022 //================================================ HashTriple_t ================
00023 //----------------------------------------------------- operator= --------------
00024 IDMap_t::HashTriple_t & IDMap_t::HashTriple_t::operator=
00025 (const HashTriple_t & s)
00026 {
00027   if ( this != &s )
00028     {
00029       c = s.c;
00030       iid = s.iid;
00031       bid = s.bid;
00032       eid = s.eid;
00033     }
00034   return *this;
00035 }
00036 
00037 
00038 
00039 
00040 //================================================ HashNode_t ==================
00041 //----------------------------------------------------- clear ------------------
00042 void IDMap_t::HashNode_t::clear ( )
00043 {
00044   if ( triple != NULL )
00045     if ( -- (triple -> c) == 0 )
00046       delete triple;
00047   delete next;
00048   
00049   triple = NULL;
00050   next = NULL;
00051 }
00052 
00053 
00054 
00055 
00056 //================================================ IDMap_t =====================
00057 const NCode_t IDMap_t::NCODE = M_IDMAP;
00058 const Size_t IDMap_t::DEFAULT_NUM_BUCKETS = 1000;
00059 
00060 
00061 //----------------------------------------------------- clear ------------------
00062 void IDMap_t::clear ( )
00063 {
00064   if ( size_m > 0 )
00065     {
00066       vector<HashNode_t>::iterator hni;
00067       for ( hni = iid_bucs_m . begin( ); hni != iid_bucs_m . end( ); hni ++ )
00068         hni -> clearchain( );
00069       for ( hni = eid_bucs_m . begin( ); hni != eid_bucs_m . end( ); hni ++ )
00070         hni -> clearchain( );
00071       size_m = 0;
00072     }
00073   type_m = NULL_NCODE;
00074 }
00075 
00076 
00077 //----------------------------------------------------- minbuckets -------------
00078 Size_t IDMap_t::minbuckets (Size_t min)
00079 {
00080   //-- Bucket size constants
00081   const uint8_t NUM_BUCKET_SIZES = 27;
00082   const Size_t BUCKET_SIZES [NUM_BUCKET_SIZES] =
00083     {
00084       53l,         97l,         193l,       389l,       769l,
00085       1543l,       3079l,       6151l,      12289l,     24593l,
00086       49157l,      98317l,      196613l,    393241l,    786433l,
00087       1572869l,    3145739l,    6291469l,   12582917l,  25165843l,
00088       50331653l,   100663319l,  201326611l, 402653189l, 805306457l, 
00089       1610612741l, 2147354947l
00090     };
00091 
00092   uint8_t first = 0;
00093   uint8_t len = NUM_BUCKET_SIZES;
00094   uint8_t half, middle;
00095   
00096   //-- Binary search for a bucket size greater than hint
00097   while ( len > 0 )
00098     {
00099       half = len >> 1;
00100       middle = first + half;
00101       if ( BUCKET_SIZES [middle] < min )
00102         {
00103           first = middle + 1;
00104           len = len - half - 1;
00105         }
00106       else
00107         len = half;
00108     }
00109   if ( first == NUM_BUCKET_SIZES )
00110     first --;
00111 
00112   return BUCKET_SIZES [first];
00113 }
00114 
00115 
00116 //----------------------------------------------------- lookupnode -------------
00117 bool IDMap_t::lookupnode (ID_t key, HashNode_t * & node) const
00118 {
00119   if ( key == NULL_ID )
00120     {
00121       node = NULL;
00122       return false;
00123     }
00124   for ( node = hashfunc (key); node -> next != NULL; node = node -> next )
00125     if ( node -> triple -> iid == key )
00126       return true;
00127   if ( node -> triple == NULL  ||  node -> triple -> iid != key )
00128     return false;
00129   else
00130     return true;
00131 }
00132 
00133 
00134 //----------------------------------------------------- lookupnode -------------
00135 bool IDMap_t::lookupnode (const string & key, HashNode_t * & node) const
00136 {
00137   if ( key . empty( ) )
00138     {
00139       node = NULL;
00140       return false;
00141     }
00142   for ( node = hashfunc (key); node -> next != NULL; node = node -> next )
00143     if ( node -> triple -> eid == key )
00144       return true;
00145   if ( node -> triple == NULL  ||  node -> triple -> eid != key )
00146     return false;
00147   else
00148     return true;
00149 }
00150 
00151 
00152 //----------------------------------------------------- removenode -------------
00153 void IDMap_t::removenode (HashNode_t * curr, HashNode_t * prev)
00154 {
00155   if ( prev == NULL )
00156     //-- If a root node
00157     {
00158       if ( curr -> next == NULL )
00159         curr -> clear( );                  // free curr's triple
00160       else
00161         {
00162           prev = curr -> next;             // this will be the new root
00163           curr -> next = NULL;             // as to not delete next on clear
00164           curr -> clear( );                // free curr's triple
00165           *curr = *prev;                   // overwrite root node
00166           delete prev;                     // free duplicate node
00167         }
00168     }
00169   else
00170     //-- If not a root node
00171     {
00172       prev -> next = curr -> next;         // bypass curr
00173       curr -> next = NULL;                 // as to not delete next on clear
00174       curr -> clear( );                    // free curr's triple
00175       delete curr;                         // free removed node
00176     }
00177 }
00178 
00179 
00180 //----------------------------------------------------- concat -----------------
00181 void IDMap_t::concat (const IDMap_t & s)
00182 {
00183   if ( this != &s )
00184     {
00185       resize (getSize( ) + s . getSize( ));
00186 
00187       //-- Copy all the triples
00188       for ( const_iterator itr = s . begin( ); itr != s . end( ); ++ itr )
00189         insert (itr -> iid, itr -> eid, itr -> bid);
00190     }
00191 }
00192 
00193 
00194 //----------------------------------------------------- insert -----------------
00195 const IDMap_t::HashTriple_t * IDMap_t::insert (ID_t iid,
00196                                                const string & eid,
00197                                                ID_t bid)
00198 {
00199   if ( iid == NULL_ID  &&  eid . empty( ) ) return NULL;
00200 
00201   HashNode_t * curri, * curre;
00202 
00203   //cerr << "insert bid: " << bid << "\tiid:" << iid << "\teid: \"" << eid << "\"" << endl;
00204 
00205   if ( lookupnode (iid, curri) )
00206     {
00207       ostringstream ss;
00208       ss << "Cannot insert int key '" << iid << "' multiple times";
00209       AMOS_THROW_ARGUMENT (ss . str( ));
00210     }
00211   if ( lookupnode (eid, curre) )
00212     {
00213       ostringstream ss;
00214       ss << "Cannot insert string key '" << eid << "' multiple times";
00215       AMOS_THROW_ARGUMENT (ss . str( ));
00216     }
00217 
00218   HashTriple_t * currt = new HashTriple_t (iid, eid, bid);
00219 
00220   if ( curri != NULL )
00221     {
00222       if ( curri -> triple != NULL )
00223         curri = curri -> next = new HashNode_t( );
00224       curri -> triple = currt;
00225       currt -> c ++;
00226     }
00227   if ( curre != NULL )
00228     {
00229       if ( curre -> triple != NULL )
00230         curre = curre -> next = new HashNode_t( );
00231       curre -> triple = currt;
00232       currt -> c ++;
00233     }
00234 
00235   if ( ++ size_m >= getBuckets( ) )
00236     resize (size_m + 1);
00237 
00238   return currt;
00239 }
00240 
00241 
00242 //----------------------------------------------------- readMessage ------------
00243 void IDMap_t::readMessage (const Message_t & msg)
00244 {
00245   clear( );
00246 
00247   try {
00248     Size_t size = -1;
00249     istringstream ss;
00250 
00251     if ( msg . exists (F_OBJECT) )
00252       {
00253         string str = msg . getField (F_OBJECT);
00254 
00255         if ( str . length( ) != NCODE_SIZE )
00256           AMOS_THROW_ARGUMENT ("Invalid object type format");
00257         type_m = Encode (str);
00258       }
00259 
00260     if ( msg . exists (F_SIZE) )
00261       {
00262         ss . str (msg . getField (F_SIZE));
00263         ss >> size;
00264         if ( !ss )
00265           AMOS_THROW_ARGUMENT ("Invalid size format");
00266         ss . clear( );
00267 
00268         resize (size);
00269       }
00270 
00271     if ( msg . exists (F_MAP) )
00272       {
00273         string eid;
00274         ID_t bid, iid;
00275         ss . str (msg . getField (F_MAP));
00276 
00277         while ( ss )
00278           {
00279             ss >> bid >> iid;
00280             ss . ignore( );
00281             getline (ss, eid);
00282             if ( ! ss . fail( ) )
00283               insert (iid, eid, bid);
00284           }
00285 
00286         if ( ! ss . eof( ) )
00287           AMOS_THROW_ARGUMENT ("Invalid map format");
00288         ss . clear( );
00289 
00290         if ( size >= 0  &&  size != size_m )
00291           AMOS_THROW_ARGUMENT ("map and size fields do not agree");
00292       }
00293   }
00294   catch (ArgumentException_t) {
00295 
00296     clear( );
00297     throw;
00298   }
00299 }
00300 
00301 
00302 //----------------------------------------------------- remove -----------------
00303 void IDMap_t::remove (ID_t key)
00304 {
00305   if ( key == NULL_ID ) return;
00306   string eid;
00307 
00308   HashNode_t * prev = NULL;
00309   HashNode_t * curr = hashfunc (key);
00310   if ( curr -> triple != NULL )
00311     for ( ; curr != NULL; curr = curr -> next )
00312       {
00313         if ( curr -> triple -> iid == key )
00314           {
00315             size_m --;
00316             if ( curr -> triple -> c > 1 )
00317               eid = curr -> triple -> eid;
00318             removenode (curr, prev);
00319             break;
00320           }
00321         prev = curr;
00322       }
00323   
00324   if ( eid . empty( ) )
00325     return;
00326 
00327   prev = NULL;
00328   curr = hashfunc (eid);
00329   if ( curr -> triple != NULL )
00330     for ( ; curr != NULL; curr = curr -> next )
00331       {
00332         if ( curr -> triple -> eid == eid )
00333           {
00334             removenode (curr, prev);
00335             break;
00336           }
00337         prev = curr;
00338       }
00339 }
00340 
00341 
00342 //----------------------------------------------------- remove -----------------
00343 void IDMap_t::remove (const string & key)
00344 {
00345   if ( key . empty( ) ) return;
00346   ID_t iid = NULL_ID;
00347 
00348   HashNode_t * prev = NULL;
00349   HashNode_t * curr = hashfunc (key);
00350   if ( curr -> triple != NULL )
00351     for ( ; curr != NULL; curr = curr -> next )
00352       {
00353         if ( curr -> triple -> eid == key )
00354           {
00355             size_m --;
00356             if ( curr -> triple -> c > 1 )
00357               iid = curr -> triple -> iid;
00358             removenode (curr, prev);
00359             break;
00360           }
00361         prev = curr;
00362       }
00363   
00364   if ( iid == NULL_ID )
00365     return;
00366 
00367   prev = NULL;
00368   curr = hashfunc (iid);
00369   if ( curr -> triple != NULL )
00370     for ( ; curr != NULL; curr = curr -> next )
00371       {
00372         if ( curr -> triple -> iid == iid )
00373           {
00374             removenode (curr, prev);
00375             break;
00376           }
00377         prev = curr;
00378       }
00379 }
00380 
00381 
00382 //----------------------------------------------------- resize -----------------
00383 void IDMap_t::resize (Size_t min)
00384 {
00385   if ( (min = minbuckets (min)) == getBuckets( ) )
00386     return;
00387 
00388   vector<HashTriple_t *> triples (size_m);
00389   long int pos = 0;
00390 
00391   if ( size_m > 0 )
00392     {
00393       //-- Collect all the triples
00394       for ( iterator itr = begin( ); itr != end ( ); ++ itr )
00395         {
00396           itr -> c ++;
00397           triples [pos ++] = itr;
00398         }
00399 
00400       Size_t  size = size_m;
00401       NCode_t type = type_m;
00402       clear( );
00403       size_m = size;
00404       type_m = type;
00405     }
00406 
00407   //-- Resize the bucket vectors
00408   if ( size_m != pos )
00409     AMOS_THROW ("Unknown fatal error during hash resize");
00410   iid_bucs_m . resize (min);
00411   eid_bucs_m . resize (min);
00412 
00413   //-- Put the triples back in the map via modified insert
00414   HashNode_t * curri, * curre;
00415   vector<HashTriple_t *>::iterator tpi;
00416   for ( tpi = triples . begin( ); tpi != triples . end( ); tpi ++ )
00417     {
00418       lookupnode ((*tpi) -> iid, curri);
00419       lookupnode ((*tpi) -> eid, curre);
00420 
00421       if ( curri != NULL )
00422         {
00423           if ( curri -> triple != NULL )
00424             curri = curri -> next = new HashNode_t( );
00425           curri -> triple = (*tpi);
00426           curri -> triple -> c ++;
00427         }
00428       if ( curre != NULL )
00429         {
00430           if ( curre -> triple != NULL )
00431             curre = curre -> next = new HashNode_t( );
00432           curre -> triple = (*tpi);
00433           curre -> triple -> c ++;
00434         }
00435 
00436       (*tpi) -> c --;
00437     }
00438 }
00439 
00440 
00441 //----------------------------------------------------- writeMessage -----------
00442 void IDMap_t::writeMessage (Message_t & msg) const
00443 {
00444   msg . clear( );
00445 
00446   try {
00447     ostringstream ss;
00448 
00449     msg . setMessageCode (IDMap_t::NCODE);
00450     
00451     if ( type_m != NULL_NCODE )
00452       msg . setField (F_OBJECT, Decode (type_m));
00453     
00454     ss << size_m;
00455     msg . setField (F_SIZE, ss . str( ));
00456     ss . str (NULL_STRING);
00457     
00458     if (size_m != 0 )
00459       {
00460         //-- Output all the triples
00461         string str;
00462         for ( const_iterator itr = begin( ); itr != end( ); ++ itr )
00463           {
00464             ss << itr -> bid << '\t' << itr -> iid << '\t';
00465             str . append (ss . str( ));
00466             ss . str (NULL_STRING);
00467             str . append (itr -> eid);
00468             str . push_back (NL_CHAR);
00469           }
00470         
00471         msg . setField (F_MAP, str);
00472       }
00473   }
00474   catch (ArgumentException_t) {
00475 
00476     msg . clear( );
00477     throw;
00478   }
00479 }
00480 
00481 
00482 //----------------------------------------------------- operator= --------------
00483 IDMap_t & IDMap_t::operator= (const IDMap_t & s)
00484 {
00485   if ( this != &s )
00486     {
00487       clear( );
00488       type_m = s . type_m;
00489       concat (s);
00490     }
00491 
00492   return *this;
00493 }
00494 
00495 
00496 //----------------------------------------------------- read -------------------
00497 void IDMap_t::read(const std::string & path)
00498 {
00499   Size_t size;
00500   ID_t bid, iid;
00501   char buffer [MAX_EID_LENGTH+1];
00502 
00503   clear( );
00504 
00505   FILE * fp = fopen(path.c_str(), "r");
00506 
00507   if (!fp)
00508     { AMOS_THROW_IO("Could not open bank partition, " + path); } 
00509 
00510   if (fscanf(fp, "%s %d", buffer, &size) != 2)
00511     { AMOS_THROW_IO("Invalid Header, Map file is corrupted for " + path); }
00512 
00513   type_m = Encode (buffer);
00514   resize (size);
00515 
00516   while (fscanf(fp, "%d\t%d", &bid, &iid) == 2)
00517   {
00518     int c = getc(fp);
00519     if (c != '\t') 
00520       { AMOS_THROW_IO("Map file is corrupted for " + path); }
00521 
00522     // Go to <= MAX_EID_LENGTH so that we know when at least 1 char is chopped
00523     int i;
00524     bool done = false;
00525     for (i = 0; i <= MAX_EID_LENGTH; i++)
00526     {
00527       c = getc(fp);
00528       buffer[i] = c;
00529 
00530       if (c == '\n' || c == EOF) 
00531       { 
00532         buffer[i] = '\0';
00533         done = true; break; 
00534       }
00535     }
00536 
00537     if (!done)
00538     {
00539       buffer[MAX_EID_LENGTH] = '\0';
00540       int chop = 0; // This immediately goes to 1
00541       while (!(c == '\n' || c == EOF))
00542       {
00543         chop++;
00544         c = getc(fp);
00545       }
00546 
00547       cerr << "WARNING: Truncated EID for IID " << iid << " to " << MAX_EID_LENGTH << " characters (chopped last " << chop <<")" << endl;
00548     }
00549 
00550     insert (iid, buffer, bid);
00551   }
00552 
00553   if (size_m != size)
00554   {
00555     AMOS_THROW_IO ("Bank Corrupted: Number of named elements doesn't match map header in " + path);
00556   }
00557 
00558   fclose(fp);
00559 }
00560 
00561 
00562 //----------------------------------------------------- write ------------------
00563 void IDMap_t::write (ostream & out) const
00564 {
00565   out << Decode(type_m) << ' ' << size_m << NL_CHAR;
00566 
00567   for ( const_iterator itr = begin( ); itr != end( ); ++ itr )
00568     out << itr -> bid << '\t' << itr -> iid << '\t' << itr -> eid << NL_CHAR;
00569 }
00570 
00571 
00572 
00573 
00574 
00575 //================================================ iterator ====================
00576 //------------------------------------------------ iterator --------------------
00577 IDMap_t::iterator::iterator (vector<HashNode_t> * iid_bucs_p,
00578                              vector<HashNode_t> * eid_bucs_p)
00579   : iid_bucs (iid_bucs_p), eid_bucs (eid_bucs_p)
00580 {
00581   if ( iid_bucs -> empty( ) )
00582     curr = NULL;
00583   else
00584     {
00585       iids = true;
00586       buc = iid_bucs -> begin( );
00587       curr = &(*buc);
00588       if ( curr -> triple == NULL )
00589         this->operator++();
00590     }
00591 }
00592 
00593 
00594 //------------------------------------------------ operator++ ------------------
00595 IDMap_t::iterator & IDMap_t::iterator::operator++ ( )
00596 {
00597   while ( curr != NULL )
00598     {
00599       curr = curr -> next;
00600       while ( curr == NULL )
00601         {
00602           ++ buc;
00603 
00604           if ( iids  &&  buc >= iid_bucs -> end( ) )
00605             {
00606               iids = false;
00607               buc = eid_bucs -> begin( );
00608             }
00609                 
00610           if ( !iids  &&  buc >= eid_bucs -> end( ) )
00611             return *this;
00612 
00613           if ( buc -> triple != NULL )
00614             curr = &(*buc);
00615         }
00616 
00617       if ( iids  ||  curr -> triple -> c == 1 )
00618         break;
00619     }
00620   return *this;
00621 }
00622 
00623 
00624 //------------------------------------------------ const_iterator --------------
00625 IDMap_t::const_iterator::const_iterator (const vector<HashNode_t> * iid_bucs_p,
00626                                          const vector<HashNode_t> * eid_bucs_p)
00627   : iid_bucs (iid_bucs_p), eid_bucs (eid_bucs_p)
00628 {
00629   if ( iid_bucs -> empty( ) )
00630     curr = NULL;
00631   else
00632     {
00633       iids = true;
00634       buc = iid_bucs -> begin( );
00635       curr = &(*buc);
00636       if ( curr -> triple == NULL )
00637         this->operator++();
00638     }
00639 }
00640 
00641 
00642 //------------------------------------------------ operator++ ------------------
00643 IDMap_t::const_iterator & IDMap_t::const_iterator::operator++ ( )
00644 {
00645   while ( curr != NULL )
00646     {
00647       curr = curr -> next;
00648       while ( curr == NULL )
00649         {
00650           ++ buc;
00651 
00652           if ( iids  &&  buc >= iid_bucs -> end( ) )
00653             {
00654               iids = false;
00655               buc = eid_bucs -> begin( );
00656             }
00657                 
00658           if ( !iids  &&  buc >= eid_bucs -> end( ) )
00659             return *this;
00660 
00661           if ( buc -> triple != NULL )
00662             curr = &(*buc);
00663         }
00664 
00665       if ( iids  ||  curr -> triple -> c == 1 )
00666         break;
00667     }
00668   return *this;
00669 }

Generated on Mon Feb 22 17:36:27 2010 for libAMOS by  doxygen 1.4.7