Sequence_AMOS.hh

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 #ifndef __Sequence_AMOS_HH
00011 #define __Sequence_AMOS_HH 1
00012 
00013 #include "Universal_AMOS.hh"
00014 #include <string>
00015 
00016 
00017 
00018 
00019 namespace AMOS {
00020 
00021 //================================================ Sequence_t ==================
00035 //==============================================================================
00036 class Sequence_t : public Universal_t
00037 {
00038 
00039 protected:
00040 
00041   uint8_t * seq_m;      
00042   uint8_t * qual_m;     
00043   Size_t length_m;      
00044 
00045 
00046   static const uint8_t COMPRESS_BIT  = 0x1;   
00047   static const uint8_t ADENINE_BITS  = 0x0;   
00048   static const uint8_t CYTOSINE_BITS = 0x40;  
00049   static const uint8_t GUANINE_BITS  = 0x80;  
00050   static const uint8_t THYMINE_BITS  = 0xC0;  
00051   static const uint8_t SEQ_BITS      = 0xC0;  
00052   static const uint8_t QUAL_BITS     = 0x3F;  
00053 
00054 
00055   //--------------------------------------------------- compress ---------------
00064   static inline uint8_t compress (char seqchar, char qualchar)
00065   {
00066     //-- Force quality score into its bits
00067     qualchar = Char2Qual (qualchar);
00068 
00069     if ( qualchar & SEQ_BITS )
00070       return 0;
00071 
00072     switch ( toupper (seqchar) )
00073       {
00074       case 'A': return (uint8_t)qualchar | ADENINE_BITS;
00075       case 'C': return (uint8_t)qualchar | CYTOSINE_BITS;
00076       case 'G': return (uint8_t)qualchar | GUANINE_BITS;
00077       case 'T': return (uint8_t)qualchar | THYMINE_BITS;
00078       case 'N': return 0;
00079       default:
00080         return 0;
00081       }
00082   }
00083 
00084 
00085   //--------------------------------------------------- uncompress -------------
00093   static inline std::pair<char, char> uncompress (uint8_t byte)
00094   {
00095     std::pair<char, char> retval;
00096 
00097     switch ( byte & SEQ_BITS )
00098       {
00099       case ADENINE_BITS:  retval . first = 'A'; break;
00100       case CYTOSINE_BITS: retval . first = 'C'; break;
00101       case GUANINE_BITS:  retval . first = 'G'; break;
00102       case THYMINE_BITS:  retval . first = 'T'; break;
00103       }
00104 
00105     byte &= QUAL_BITS;
00106     if ( byte == 0 )
00107       retval . first = 'N';
00108 
00109     retval . second = Qual2Char (byte);
00110 
00111     return retval;
00112   }
00113 
00114 
00115   //--------------------------------------------------- readRecord -------------
00116   virtual void readRecord (std::istream & fix, std::istream & var);
00117 
00118   //--------------------------------------------------- readRecordFix ----------
00119   virtual void readRecordFix (std::istream & fix);
00120 
00121   //--------------------------------------------------- writeRecord ------------
00122   virtual void writeRecord (std::ostream & fix, std::ostream & var) const;
00123 
00124 
00125 public:
00126 
00127   static const NCode_t NCODE;
00129 
00130 
00131   //--------------------------------------------------- Sequence_t -------------
00136   Sequence_t ( )
00137   {
00138     seq_m = qual_m = NULL;
00139     length_m = 0;
00140   }
00141 
00142 
00143   //--------------------------------------------------- Sequence_t -------------
00146   Sequence_t (const Sequence_t & source)
00147   {
00148     seq_m = qual_m = NULL;
00149     *this = source;
00150   }
00151 
00152 
00153   //--------------------------------------------------- ~Sequence_t ------------
00158   virtual ~Sequence_t ( )
00159   {
00160     free (seq_m);
00161     free (qual_m);
00162   }
00163 
00164 
00165   //--------------------------------------------------- clear ------------------
00171   virtual void clear ( );
00172 
00173 
00174   //--------------------------------------------------- compress ---------------
00192   void compress ( );
00193 
00194 
00195   //--------------------------------------------------- getBase ----------------
00205   std::pair<char, char> getBase (Pos_t index) const
00206   {
00207     if (seq_m == NULL)
00208       AMOS_THROW_ARGUMENT("No sequence data");
00209     
00210     if ( index < 0 || index >= length_m )
00211       AMOS_THROW_ARGUMENT ("Requested sequence index is out of range");
00212 
00213     if ( isCompressed( ) )
00214       return uncompress (seq_m [index]);
00215     else
00216       return std::make_pair ((char)(seq_m [index]), (char)(qual_m [index]));
00217   }
00218 
00219 
00220 
00221   //--------------------------------------------------- getGCContent --------------
00226   virtual double getGCContent (const Range_t & rng) const
00227   {
00228     int gc = 0;
00229     int all = 0;
00230 
00231     int start = rng.getLo();
00232     int stop = rng.getHi();
00233 
00234     // skip ambiguities, gaps
00235 
00236     for (int i = start; i < stop; i++)
00237     {
00238       switch(getBase(i).first)
00239       {
00240         case 'A':
00241         case 'a': 
00242         case 'T':
00243         case 't': all++; break;
00244 
00245         case 'C':
00246         case 'c':
00247         case 'G':
00248         case 'g': all++; gc++; break;
00249       };
00250     }
00251 
00252     return (all) ? ((double)gc)/all : 0.0;
00253   }
00254 
00255   //--------------------------------------------------- getGCContent --------------
00260   virtual double getGCContent ( ) const
00261   {
00262     return getGCContent(Range_t(0, length_m));
00263   }
00264 
00265 
00266 
00267   //--------------------------------------------------- getLength --------------
00272   Size_t getLength ( ) const
00273   {
00274     return length_m;
00275   }
00276 
00277 
00278   //--------------------------------------------------- getNCode ---------------
00279   virtual NCode_t getNCode ( ) const
00280   {
00281     return Sequence_t::NCODE;
00282   }
00283 
00284 
00285   //--------------------------------------------------- getQualString ----------
00290   std::string getQualString ( ) const
00291   {
00292     return getQualString (Range_t (0, length_m));
00293   }
00294 
00295 
00296   //--------------------------------------------------- getQualString ----------
00307   std::string getQualString (Range_t range) const;
00308 
00309 
00310   //--------------------------------------------------- getSeqString -----------
00315   std::string getSeqString ( ) const
00316   {
00317     return getSeqString (Range_t (0, length_m));
00318   }
00319 
00320 
00321   //--------------------------------------------------- getSeqString -----------
00332   std::string getSeqString (Range_t range) const;
00333 
00334 
00335   //--------------------------------------------------- isCompressed -----------
00343   bool isCompressed ( ) const
00344   {
00345     return flags_m . nibble & COMPRESS_BIT;
00346   }
00347 
00348 
00349   //--------------------------------------------------- readMessage ------------
00350   virtual void readMessage (const Message_t & msg);
00351 
00352 
00353   //--------------------------------------------------- setBase ----------------
00373   void setBase (char seqchar, char qualchar, Pos_t index)
00374   {  
00375     if (seq_m == NULL)
00376       AMOS_THROW_ARGUMENT("No sequence data");
00377 
00378     if ( index < 0 || index >= length_m )
00379       AMOS_THROW_ARGUMENT ("Requested sequence index is out of range");
00380 
00381     if ( isCompressed( ) )
00382       seq_m [index] = compress (seqchar, qualchar);
00383     else
00384       {
00385         seq_m  [index] = seqchar;
00386         qual_m [index] = qualchar;
00387       }
00388   }
00389 
00390 
00391   //--------------------------------------------------- setSequence ------------
00406   void setSequence (const char * seq, const char * qual);
00407 
00408 
00409   //--------------------------------------------------- setSequence ------------
00424   void setSequence (const std::string & seq, const std::string & qual);
00425 
00426 
00427   //--------------------------------------------------- uncompress -------------
00437   void uncompress ( );
00438 
00439 
00440   //--------------------------------------------------- operator= --------------
00448   Sequence_t & operator= (const Sequence_t & source);
00449 
00450 
00451   //--------------------------------------------------- writeMessage -----------
00452   virtual void writeMessage (Message_t & msg) const;
00453 
00454 };
00455 
00456 } // namespace AMOS
00457 
00458 #endif // #ifndef __Sequence_AMOS_HH

Generated on Mon Feb 22 17:36:27 2010 for libAMOS by  doxygen 1.4.7