Contig_AMOS.cc

Go to the documentation of this file.
00001 
00002 
00010 
00011 #include "Contig_AMOS.hh"
00012 #include <cctype>
00013 using namespace AMOS;
00014 using namespace std;
00015 
00016 
00017 
00018 
00019 //================================================ Contig_t ====================
00020 const NCode_t Contig_t::NCODE = M_CONTIG;
00021 
00022 
00023 //----------------------------------------------------- indexGaps --------------
00024 void Contig_t::indexGaps()
00025 {
00026   if (gapsvalid_m) { return; }
00027   gapsvalid_m = true;
00028 
00029   gaps_m.clear();
00030 
00031   Pos_t len = getLength();
00032 
00033   for (Pos_t i = 0; i < len; i++)
00034   {
00035     if (getBase(i).first == '-')
00036     {
00037       gaps_m.push_back(i);
00038     }
00039   }
00040 }
00041 
00042 
00043 //----------------------------------------------------- gap2ungap --------------
00044 Pos_t Contig_t::gap2ungap (Pos_t gap)
00045 {
00046   indexGaps();
00047 
00048   int l = gaps_m.size();
00049 
00050   Pos_t retval = gap;
00051 
00052   for (int i = 0; (i < l) && (gaps_m[i] <= gap); i++)
00053   {
00054     retval--;
00055   }
00056 
00057   retval++;
00058 
00059   return retval;
00060 }
00061 
00062 
00063 //----------------------------------------------------- ungap2gap --------------
00064 Size_t Contig_t::ungap2gap (Pos_t ungap)
00065 {
00066   indexGaps();
00067 
00068   int l = gaps_m.size();
00069 
00070   Pos_t retval = ungap-1;
00071 
00072   for (int i = 0; (i < l) && (gaps_m[i] <= retval); i++)
00073   {
00074     retval++;
00075   }
00076 
00077   return retval;
00078 }
00079 
00080 
00081 //----------------------------------------------------- getSpan ----------------
00082 Size_t Contig_t::getSpan ( ) const
00083 {
00084   Pos_t hi,lo;
00085 
00086 
00087   if ( reads_m . empty( ) )
00088     {
00089       lo = hi = 0;
00090     }
00091   else
00092     {
00093       vector<Tile_t>::const_iterator ti = reads_m . begin( );
00094 
00095       lo = ti -> offset;
00096       hi = ti -> offset + ti -> range . getLength( );
00097 
00098       for ( ++ ti; ti != reads_m . end( ); ++ ti )
00099         {
00100           if ( ti -> offset < lo )
00101             lo = ti -> offset;
00102           if ( ti -> offset + ti -> getGappedLength( ) > hi )
00103             hi = ti -> offset + ti -> getGappedLength( );
00104         }
00105     }
00106 
00107   return hi - lo;
00108 }
00109 
00110 
00111 //----------------------------------------------------- getUngappedLength ------
00112 Size_t Contig_t::getUngappedLength( ) const
00113 {
00114   Size_t retval = length_m;
00115 
00116   for (Pos_t i = 0; i < length_m; ++ i )
00117     if ( ! isalpha (getBase (i) . first) )
00118       -- retval;
00119 
00120   return retval;
00121 }
00122 
00123 
00124 //----------------------------------------------------- getUngappedQualString --
00125 string Contig_t::getUngappedQualString (Range_t range) const
00126 {
00127   Pos_t lo = range . getLo( );
00128   Pos_t hi = range . getHi( );
00129 
00130   //-- Check preconditions
00131   if ( lo < 0  ||  hi > getLength( ) )
00132     AMOS_THROW_ARGUMENT ("Invalid quality subrange");
00133 
00134   string retval;
00135   retval . reserve (hi - lo);
00136   pair<char, char> seqqualc;
00137 
00138   //-- Skip the gaps in the sequence and populate the retval
00139   for ( ; lo < hi; lo ++ )
00140     {
00141       seqqualc = getBase (lo);
00142       if ( isalpha (seqqualc . first) )
00143         retval . push_back (seqqualc . second);
00144     }
00145 
00146   if ( range . isReverse( ) )
00147     AMOS::Reverse (retval);
00148 
00149   return retval;
00150 }
00151 
00152 
00153 //----------------------------------------------------- getUngappedSeqString ---
00154 string Contig_t::getUngappedSeqString (Range_t range) const
00155 {
00156   Pos_t lo = range . getLo( );
00157   Pos_t hi = range . getHi( );
00158 
00159   //-- Check preconditions
00160   if ( lo < 0  ||  hi > getLength( ) )
00161     AMOS_THROW_ARGUMENT ("Invalid sequence subrange");
00162 
00163   string retval;
00164   retval . reserve (hi - lo);
00165   char seqc;
00166 
00167   //-- Skip the gaps in the sequence and populate the retval
00168   for ( ; lo < hi; lo ++ )
00169     {
00170       seqc = getBase (lo) . first;
00171       if ( isalpha (seqc) )
00172         retval . push_back (seqc);
00173     }
00174   
00175   if ( range . isReverse( ) )
00176     AMOS::ReverseComplement (retval);
00177 
00178   return retval;
00179 }
00180 
00181 
00182 //--------------------------------------------------- getCovStat ---------------
00183 double Contig_t::getCovStat(const double globalArrivalRate) const
00184 {
00185   const float ln2=0.69314718055994530941723212145818;
00186 
00187   assert(globalArrivalRate != -1);
00188   return (getAvgRho() * globalArrivalRate) - (ln2 * (reads_m.size() -1));
00189 }
00190 
00191 
00192 //--------------------------------------------------- getAvgRho ---------------
00193 double Contig_t::getAvgRho( ) const
00194 {
00195   double avgRho = 0;
00196   Pos_t  lo, hi;
00197   Size_t lenLo, lenHi;
00198 
00199   if ( !reads_m . empty( ) )
00200   {
00201     vector<Tile_t>::const_iterator ti = reads_m . begin( );
00202 
00203     lo = ti -> offset;
00204     lenLo = ti->range.getLength();
00205     hi = ti -> offset + ti -> range . getLength( );
00206     lenHi = ti->range.getLength();
00207 
00208     for ( ++ ti; ti != reads_m . end( ); ++ ti )
00209     {
00210       if ( ti -> offset < lo )
00211         lo = ti -> offset;
00212         lenLo = ti->range.getLength();
00213       if ( ti -> offset + ti -> range.getLength( ) > hi )
00214         hi = ti -> offset + ti -> range.getLength();
00215         lenHi = ti->range.getLength();
00216     }
00217 
00218     double avgLen = (lenLo + lenHi) / 2;
00219       if (avgLen < getLength()) {
00220         avgRho = getLength() - avgLen;
00221     }
00222   }
00223       
00224   return avgRho;
00225 }
00226 
00227 
00228 //----------------------------------------------------- insertGapColumn --------
00229 void Contig_t::insertGapColumn (Pos_t gindex)
00230 {
00231   // Insert gap into the consensus
00232   string qual(getQualString());
00233   string seq(getSeqString());
00234 
00235   if (gindex > seq.length())
00236   {
00237     AMOS_THROW_ARGUMENT("Invalid position specified for inserting gap column");
00238   }
00239 
00240   seq.insert(gindex, "-", 1); 
00241   qual.insert(gindex, "N", 1); // TODO: Set qv more intelligently
00242   setSequence(seq, qual);
00243 
00244   // Adjust the reads
00245   vector<Tile_t>::iterator i;
00246   for (i =  reads_m.begin();
00247        i != reads_m.end();
00248        i++)
00249   {
00250     if (i->offset >= gindex)
00251     {
00252       // insert before read, shift over 1
00253       // Don't insert starting gaps
00254       i->offset++;
00255     }
00256     else if (i->getRightOffset() < gindex)
00257     {
00258       // insert after read, nothing to do
00259     }
00260     else
00261     {
00262       // gap inserted into read
00263       int gseqpos = gindex - i->offset;
00264 
00265       // create a new vector of gaps, with a gap at gseqpos
00266       vector<Pos_t> newgaps; 
00267 
00268       // count gaps that occur before gap we are inserting
00269       int gapcount = 0;
00270       vector<Pos_t>::iterator g;
00271       for (g =  i->gaps.begin();
00272            g != i->gaps.end();
00273            g++, gapcount++)
00274       {
00275         int cgseqpos = *g+gapcount;
00276         if (cgseqpos > gseqpos) { break; }
00277 
00278         newgaps.push_back(*g);
00279       }
00280 
00281       // Add new gap
00282       newgaps.push_back(gseqpos-gapcount);
00283 
00284       // Copy gaps following the inserted gap
00285       while (g != i->gaps.end())
00286       {
00287         newgaps.push_back(*g);
00288         g++;
00289       }
00290 
00291       i->gaps = newgaps;
00292     }
00293   }
00294 }
00295 
00296 
00297 
00298 //----------------------------------------------------- readMessage ------------
00299 void Contig_t::readMessage (const Message_t & msg)
00300 {
00301   gapsvalid_m = false;
00302   Sequence_t::readMessage (msg);
00303 
00304   try {
00305     vector<Message_t>::const_iterator i;
00306 
00307     for ( i  = msg . getSubMessages( ) . begin( );
00308           i != msg . getSubMessages( ) . end( ); i ++ )
00309       {
00310         if ( i -> getMessageCode( ) == M_TILE )
00311           {
00312             reads_m . push_back (Tile_t( ));
00313             reads_m . back( ) . readMessage (*i);
00314           }
00315       }
00316   }
00317   catch (ArgumentException_t) {
00318     
00319     clear( );
00320     throw;
00321   }
00322 
00323 }
00324 
00325 
00326 //----------------------------------------------------- readRecord -------------
00327 void Contig_t::readRecord (istream & fix, istream & var)
00328 {
00329   gapsvalid_m = false;
00330   Sequence_t::readRecord (fix, var);
00331 
00332   Size_t sizet;
00333   readLE (fix, &sizet);
00334 
00335   reads_m . resize (sizet);
00336   for ( Pos_t i = 0; i < sizet; i ++ )
00337     reads_m [i] . readRecord (var);
00338 }
00339 
00340 
00341 //----------------------------------------------------- readRecordFix ----------
00342 void Contig_t::readRecordFix (istream & fix)
00343 {
00344   gapsvalid_m = false;
00345   Sequence_t::readRecordFix (fix);
00346 
00347   Size_t sizet;
00348   readLE (fix, &sizet); 
00349 
00350   reads_m.clear();
00351   reads_m . resize (sizet);
00352 }
00353 
00354 
00355 //----------------------------------------------------- readUMD ----------------
00356 bool Contig_t::readUMD (istream & in)
00357 {
00358   gapsvalid_m = false;
00359   string eid;
00360   Tile_t tile;
00361   istringstream ss;
00362   string line;
00363 
00364   while ( line . empty( )  ||  line [0] != 'C' )
00365     {
00366       if ( !in . good( ) )
00367         return false;
00368       getline (in, line);
00369     }
00370 
00371   clear( );
00372 
00373   try {
00374 
00375     ss . str (line);
00376     ss . ignore( );
00377     ss >> eid;
00378     if ( !ss )
00379       AMOS_THROW_ARGUMENT ("Invalid contig ID");
00380     ss . clear( );
00381     setEID (eid);
00382 
00383     while ( true )
00384       {
00385         getline (in, line);
00386         if ( line . empty( ) )
00387           break;
00388 
00389         ss . str (line);
00390         ss >> tile . source;
00391         ss >> tile . range . begin;
00392         ss >> tile . range . end;
00393         if ( !ss )
00394           AMOS_THROW_ARGUMENT ("Invalid read layout");
00395         ss . clear( );
00396         tile . offset = tile . range . begin < tile . range . end ?
00397           tile . range . begin : tile . range . end;
00398         tile . range . begin -= tile . offset;
00399         tile . range . end -= tile . offset;
00400         getReadTiling( ) . push_back (tile);
00401       }
00402 
00403   }
00404   catch (IOException_t) {
00405 
00406     //-- Clean up and rethrow
00407     clear( );
00408     throw;
00409   }
00410 
00411   return true;
00412 }
00413 
00414 void Contig_t::reverseComplement()
00415 {
00416   // Reverse the consensus
00417   string qual(getQualString());
00418   string seq(getSeqString());
00419   int conslen = seq.length();
00420 
00421   AMOS::ReverseComplement(seq);
00422   reverse(qual.begin(), qual.end());
00423   setSequence(seq, qual);
00424 
00425   // Flip the orientation of the reads
00426   vector<Tile_t>::iterator i;
00427   for (i =  reads_m.begin();
00428        i != reads_m.end();
00429        i++)
00430   {
00431     i->range.swap();
00432     i->offset = conslen - (i->offset + i->getGappedLength());
00433 
00434     Pos_t len = i->range.getLength();
00435 
00436     vector<Pos_t>::iterator g;
00437     for (g =  i->gaps.begin();
00438          g != i->gaps.end();
00439          g++)
00440     {
00441       *g = len - *g;
00442     }
00443 
00444     reverse(i->gaps.begin(), i->gaps.end());
00445   }
00446 }
00447 
00448 
00449 //----------------------------------------------------- writeMessage -----------
00450 void Contig_t::writeMessage (Message_t & msg) const
00451 {
00452   Sequence_t::writeMessage (msg);
00453 
00454   try {
00455     Pos_t begin = msg . getSubMessages( ) . size( );
00456     msg . getSubMessages( ) . resize (begin + reads_m . size( ));
00457 
00458     msg . setMessageCode (Contig_t::NCODE);
00459 
00460     if ( !reads_m . empty( ) )
00461       {
00462         vector<Tile_t>::const_iterator tvi;
00463         for ( tvi = reads_m . begin( ); tvi != reads_m . end( ); ++ tvi )
00464           tvi -> writeMessage (msg . getSubMessages( ) [begin ++]);
00465       }
00466   }
00467   catch (ArgumentException_t) {
00468 
00469     msg . clear( );
00470     throw;
00471   }
00472 }
00473 
00474 
00475 //----------------------------------------------------- writeRecord ------------
00476 void Contig_t::writeRecord (ostream & fix, ostream & var) const
00477 {
00478   Sequence_t::writeRecord (fix, var);
00479 
00480   Size_t sizet = reads_m . size( );
00481   writeLE (fix, &sizet);
00482 
00483   for ( Pos_t i = 0; i < sizet; i ++ )
00484     reads_m [i] . writeRecord (var);
00485 }
00486 
00487 
00488 //----------------------------------------------------- writeUMD ---------------
00489 void Contig_t::writeUMD (ostream & out) const
00490 {
00491   vector<Tile_t>::const_iterator ti;
00492 
00493   out << "C " << getEID( ) << endl;
00494 
00495   for ( ti = reads_m . begin( ); ti != reads_m . end( ); ti ++ )
00496     out << ti -> source << ' '
00497         << ti -> range . begin + ti -> offset << ' '
00498         << ti -> range . end + ti -> offset << endl;
00499 
00500   out << endl;
00501 
00502   if ( !out . good( ) )
00503     AMOS_THROW_IO ("UMD message write failure");
00504 }

Generated on Mon Feb 22 17:36:27 2010 for libAMOS by  doxygen 1.4.7