Kmer_AMOS.cc

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 #include "Kmer_AMOS.hh"
00011 using namespace AMOS;
00012 using namespace std;
00013 
00014 
00015 
00016 
00017 //================================================ Kmer_t ======================
00018 const NCode_t Kmer_t::NCODE = M_KMER;
00019 const uint8_t Kmer_t::MAX_LENGTH    = 255;
00020 
00021 
00022 //----------------------------------------------------- clear ------------------
00023 void Kmer_t::clear ( )
00024 {
00025   Universal_t::clear( );
00026   free (seq_m);
00027   seq_m = NULL;
00028   count_m = length_m = 0;
00029   reads_m . clear( );
00030 }
00031 
00032 
00033 //----------------------------------------------------- getSeqString -----------
00034 string Kmer_t::getSeqString ( ) const
00035 {
00036   string retval (length_m, NULL_CHAR);
00037 
00038   //-- See developer comments for getBase
00039   Pos_t ci = -1;
00040   uint8_t byte = 0;
00041   for ( Pos_t ui = 0; ui < length_m; ui ++ )
00042     {
00043       if ( ui % 4 == 0 )
00044         byte = seq_m [++ ci];
00045 
00046       retval [ui] = uncompress (byte);
00047       byte <<= 2;
00048     }
00049 
00050   return retval;
00051 }
00052 
00053 
00054 //----------------------------------------------------- readMessage-------------
00055 void Kmer_t::readMessage (const Message_t & msg)
00056 {
00057   Universal_t::readMessage (msg);
00058 
00059   try {
00060 
00061     istringstream ss;
00062 
00063     if ( msg . exists (F_COUNT) )
00064       {
00065         ss . str (msg . getField (F_COUNT));
00066         ss >> count_m;
00067         if ( !ss )
00068           AMOS_THROW_ARGUMENT ("Invalid count format");
00069         ss . clear( );
00070       }
00071 
00072     if ( msg . exists (F_SEQUENCE) )
00073       setSeqString (msg . getField (F_SEQUENCE));
00074 
00075     if ( msg . exists (F_READS) )
00076       {
00077         ID_t iid;
00078 
00079         ss . str (msg . getField (F_READS));
00080 
00081         while ( ss )
00082           {
00083             ss >> iid;
00084             if ( ! ss . fail( ) )
00085               reads_m . push_back (iid);
00086           }
00087 
00088         if ( !ss . eof( ) )
00089           AMOS_THROW_ARGUMENT ("Invalid read link list format");
00090         ss . clear( );
00091       }
00092   }
00093   catch (ArgumentException_t) {
00094     
00095     clear( );
00096     throw;
00097   }
00098 }
00099 
00100 
00101 //----------------------------------------------------- readRecord -------------
00102 void Kmer_t::readRecord (istream & fix, istream & var)
00103 {
00104   Universal_t::readRecord (fix, var);
00105 
00106   Size_t size;
00107   readLE (fix, &count_m);
00108   readLE (fix, &length_m);
00109   readLE (fix, &size);
00110 
00111   reads_m . resize (size, NULL_ID);
00112   for ( Pos_t i = 0; i < size; i ++ )
00113     readLE (var, &(reads_m [i]));
00114 
00115   size = length_m / 4 + (length_m % 4 ? 1 : 0);
00116   seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00117   var . read ((char *)seq_m, size);
00118 }
00119 
00120 
00121 //----------------------------------------------------- readRecordFix ----------
00122 void Kmer_t::readRecordFix (istream & fix)
00123 {
00124   Universal_t::readRecordFix (fix);
00125 
00126   Size_t size;
00127   readLE (fix, &count_m);
00128   readLE (fix, &length_m);
00129   readLE (fix, &size);
00130 
00131   reads_m.clear();
00132 
00133   free(seq_m);
00134   seq_m = NULL;
00135 }
00136 
00137 
00138 //----------------------------------------------------- setSeqString -----------
00139 void Kmer_t::setSeqString (const string & seq)
00140 {
00141   Size_t osize = seq . size( );
00142   Size_t size = osize;
00143   if ( size > Kmer_t::MAX_LENGTH )
00144     AMOS_THROW_ARGUMENT ("Invalid kmer sequence is too long");
00145 
00146   size = size / 4 + (size % 4 ? 1 : 0);
00147   seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00148 
00149   //-- See developer comments for setBase
00150   Pos_t ci = -1;
00151   int offset = 8;
00152   length_m = 0;
00153   for ( Size_t ui = 0; ui < osize; ui ++ )
00154     {
00155       if ( seq [ui] == NL_CHAR )
00156         continue;
00157 
00158       length_m ++;
00159       if ( offset >= 8 )
00160         {
00161           offset = 0;
00162           seq_m [++ ci] = 0;
00163         }
00164       seq_m [ci] |= compress (seq [ui]) >> offset;
00165       offset += 2;
00166     }
00167 
00168   if ( length_m != osize )
00169     {
00170       size = length_m / 4 + (length_m % 4 ? 1 : 0);
00171       seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00172     }
00173 }
00174 
00175 
00176 //----------------------------------------------------- writeMessage -----------
00177 void Kmer_t::writeMessage (Message_t & msg) const
00178 {
00179   Universal_t::writeMessage (msg);
00180 
00181   try {
00182 
00183     ostringstream ss;
00184 
00185     msg . setMessageCode (Kmer_t::NCODE);
00186 
00187     ss << count_m;
00188     msg . setField (F_COUNT, ss . str( ));
00189     ss . str (NULL_STRING);
00190 
00191     if ( length_m != 0 )
00192       msg . setField (F_SEQUENCE, getSeqString( ));
00193 
00194     if ( !reads_m . empty( ) )
00195       {
00196         vector<ID_t>::const_iterator vi;
00197 
00198         for ( vi = reads_m . begin( ); vi != reads_m . end( ); vi ++ )
00199           ss << *vi << endl;
00200         msg . setField (F_READS, ss . str( ));
00201         ss . str (NULL_STRING);
00202       }
00203   }
00204   catch (ArgumentException_t) {
00205 
00206     msg . clear( );
00207     throw;
00208   }
00209 }
00210 
00211 
00212 //----------------------------------------------------- writeRecord ------------
00213 void Kmer_t::writeRecord (ostream & fix, ostream & var) const
00214 {
00215   Universal_t::writeRecord (fix, var);
00216 
00217   Size_t size = reads_m . size( );
00218   writeLE (fix, &count_m);
00219   writeLE (fix, &length_m);
00220   writeLE (fix, &size);
00221 
00222   for ( Pos_t i = 0; i < size; i ++ )
00223     writeLE (var, &(reads_m [i]));
00224 
00225   size = length_m / 4 + (length_m % 4 ? 1 : 0);
00226   var . write ((char *)seq_m, size);
00227 }
00228 
00229 
00230 //----------------------------------------------------- operator= --------------
00231 Kmer_t & Kmer_t::operator= (const Kmer_t & source)
00232 {
00233   if ( this != &source )
00234     {
00235       Universal_t::operator= (source);
00236 
00237       Size_t size = source . length_m / 4 + (source . length_m % 4 ? 1 : 0);
00238       seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00239       memcpy (seq_m, source . seq_m, size);
00240 
00241       count_m = source . count_m;
00242       length_m = source . length_m;
00243       reads_m = source . reads_m;
00244     }
00245 
00246   return *this;
00247 }

Generated on Mon Feb 22 17:36:27 2010 for libAMOS by  doxygen 1.4.7