Sequence_AMOS.cc

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 #include "Sequence_AMOS.hh"
00011 using namespace AMOS;
00012 using namespace std;
00013 #define CHARS_PER_LINE 70
00014 
00015 
00016 
00017 
00018 //================================================ Sequence_t ==================
00019 const NCode_t Sequence_t::NCODE = M_SEQUENCE;
00020 
00021 
00022 //----------------------------------------------------- clear ------------------
00023 void Sequence_t::clear ( )
00024 {
00025   bool compress = flags_m . nibble & COMPRESS_BIT;
00026   Universal_t::clear( );
00027   free (seq_m);
00028   free (qual_m);
00029   seq_m = qual_m = NULL;
00030   length_m = 0;
00031   if ( compress )
00032     flags_m . nibble |= COMPRESS_BIT;
00033 }
00034 
00035 
00036 //----------------------------------------------------- compress ---------------
00037 void Sequence_t::compress ( )
00038 {
00039   if ( isCompressed( ) )
00040     return;
00041 
00042   //-- store compression flag in bit COMPRESS_BIT
00043   flags_m . nibble |= COMPRESS_BIT;
00044 
00045   if (seq_m == NULL)
00046     return;
00047 
00048   //-- Store compressed data in seq_m
00049   for ( Pos_t i = 0; i < length_m; i ++ )
00050     seq_m [i] = compress (seq_m [i], qual_m [i]);
00051 
00052   //-- Free qual_m, it is no longer used
00053   free (qual_m);
00054   qual_m = NULL;
00055 }
00056 
00057 
00058 //----------------------------------------------------- getQualString ----------
00059 string Sequence_t::getQualString (Range_t range) const
00060 {
00061   Pos_t lo = range . getLo( );
00062   Pos_t hi = range . getHi( );
00063 
00064   //-- Check preconditions
00065   if ( lo < 0  ||  hi > length_m )
00066     AMOS_THROW_ARGUMENT ("Invalid quality subrange");
00067 
00068   //-- Allocate space for retval
00069   string retval (hi - lo, NULL_CHAR);
00070 
00071   //-- Fill retval
00072   for ( Pos_t i = 0; lo < hi; i ++, lo ++ )
00073     retval [i] = getBase (lo) . second;
00074 
00075   if ( range . isReverse( ) )
00076     AMOS::Reverse (retval);
00077 
00078   return retval;
00079 }
00080 
00081 
00082 //----------------------------------------------------- getSeqString -----------
00083 string Sequence_t::getSeqString (Range_t range) const
00084 {
00085   Pos_t lo = range . getLo( );
00086   Pos_t hi = range . getHi( );
00087 
00088   //-- Check preconditions
00089   if ( lo < 0  ||  hi > length_m )
00090     AMOS_THROW_ARGUMENT ("Invalid sequence subrange");
00091 
00092   //-- Allocate space for retval
00093   string retval (hi - lo, NULL_CHAR);
00094 
00095   //-- Fill retval
00096   for ( Pos_t i = 0; lo < hi; i ++, lo ++ )
00097     retval [i] = getBase (lo) . first;
00098 
00099   if ( range . isReverse( ) )
00100     AMOS::ReverseComplement (retval);
00101 
00102   return retval;
00103 }
00104 
00105 
00106 //--------------------------------------------------- readMessage ------------
00107 void Sequence_t::readMessage (const Message_t & msg)
00108 {
00109   Universal_t::readMessage (msg);
00110 
00111   try {
00112 
00113     if ( msg . exists (F_SEQUENCE)  &&
00114          msg . exists (F_QUALITY) )
00115       setSequence (msg . getField (F_SEQUENCE),
00116                    msg . getField (F_QUALITY));
00117     else if ( msg . exists (F_SEQUENCE)  ||
00118               msg . exists (F_QUALITY) )
00119       AMOS_THROW_ARGUMENT ("Missing sequence or quality field");
00120   }
00121   catch (ArgumentException_t) {
00122     
00123     clear( );
00124     throw;
00125   }
00126 }
00127 
00128 
00129 //----------------------------------------------------- readRecord -------------
00130 void Sequence_t::readRecord (istream & fix, istream & var)
00131 {
00132   Universal_t::readRecord (fix, var);
00133 
00134   readLE (fix, &length_m);
00135 
00136   seq_m = (uint8_t *) SafeRealloc (seq_m, length_m);
00137   var . read ((char *)seq_m, length_m);
00138 
00139   if ( !isCompressed( ) )
00140     {
00141       qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00142       var . read ((char *)qual_m, length_m);
00143     }
00144 }
00145 
00146 //----------------------------------------------------- readRecordFix ----------
00147 void Sequence_t::readRecordFix (istream & fix)
00148 {
00149   Universal_t::readRecordFix (fix);
00150 
00151   readLE (fix, &length_m);
00152 
00153   free(seq_m);
00154   free(qual_m);
00155 
00156   seq_m = qual_m = NULL;
00157 }
00158 
00159 
00160 
00161 
00162 //----------------------------------------------------- setSequence ------------
00163 void Sequence_t::setSequence (const char * seq, const char * qual)
00164 {
00165   //-- Check preconditions
00166   Size_t length = strlen (seq);
00167   if ( length != (Size_t)strlen (qual) )
00168     AMOS_THROW_ARGUMENT ("Sequence and quality lengths disagree");
00169 
00170   //-- Set the sequence
00171   seq_m = (uint8_t *) SafeRealloc (seq_m, length);
00172   if ( !isCompressed( ) )
00173     qual_m = (uint8_t *) SafeRealloc (qual_m, length);
00174 
00175   length_m = 0;
00176   for ( Pos_t i = 0; i < length; i ++ )
00177     {
00178       if ( seq [i] == NL_CHAR  &&  qual [i] == NL_CHAR )
00179           continue;
00180       else if ( seq [i] == NL_CHAR  ||  qual [i] == NL_CHAR )
00181         AMOS_THROW_ARGUMENT ("Invalid newline found in seq and qlt data");
00182 
00183       length_m ++;
00184       setBase (seq [i], qual [i], length_m - 1);
00185     }
00186 
00187   if ( length_m != length )
00188     {
00189       seq_m = (uint8_t *) SafeRealloc (seq_m, length_m);
00190       if ( !isCompressed( ) )
00191         qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00192     }
00193 }
00194 
00195 
00196 //----------------------------------------------------- setSequence ------------
00197 void Sequence_t::setSequence (const string & seq, const string & qual)
00198 {
00199   //-- Check preconditions
00200   Size_t length = seq . size( );
00201   if ( length != (Size_t)qual . size( ) )
00202     AMOS_THROW_ARGUMENT ("Sequence and quality lengths disagree");
00203 
00204   //-- Set the sequence
00205   seq_m = (uint8_t *) SafeRealloc (seq_m, length);
00206   if ( !isCompressed( ) )
00207     qual_m = (uint8_t *) SafeRealloc (qual_m, length);
00208 
00209   length_m = 0;
00210   for ( Pos_t i = 0; i < length; i ++ )
00211     {
00212       if ( seq [i] == NL_CHAR  &&  qual [i] == NL_CHAR )
00213           continue;
00214       else if ( seq [i] == NL_CHAR  ||  qual [i] == NL_CHAR )
00215         AMOS_THROW_ARGUMENT ("Invalid newline found in seq and qlt data");
00216 
00217       length_m ++;
00218       setBase (seq [i], qual [i], length_m - 1);
00219     }
00220 
00221   if ( length_m != length )
00222     {
00223       seq_m = (uint8_t *) SafeRealloc (seq_m, length_m);
00224       if ( !isCompressed( ) )
00225         qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00226     }
00227 }
00228 
00229 
00230 //----------------------------------------------------- uncompress -------------
00231 void Sequence_t::uncompress ( )
00232 {
00233   if ( !isCompressed( ) )
00234     return;
00235 
00236   //-- store compression flag in bit COMPRESS_BIT
00237   flags_m . nibble &= ~COMPRESS_BIT;
00238 
00239   if (seq_m == NULL)
00240     return;
00241 
00242   //-- Uncompress, move quality scores back to qual_m
00243   pair<char, char> retval;
00244   qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00245   for ( Pos_t i = 0; i < length_m; i ++ )
00246     {
00247       retval = uncompress (seq_m [i]);
00248       seq_m  [i] = retval . first;
00249       qual_m [i] = retval . second;
00250     }
00251 }
00252 
00253 
00254 //----------------------------------------------------- writeMessage -----------
00255 void Sequence_t::writeMessage (Message_t & msg) const
00256 {
00257   Universal_t::writeMessage (msg);
00258 
00259   try {
00260 
00261     msg . setMessageCode (Sequence_t::NCODE);
00262 
00263     if ( length_m != 0 && seq_m )
00264       {
00265         pair<char, char> cp;
00266         Pos_t i, j, last;
00267         Size_t pos = length_m + ((length_m - 1) / CHARS_PER_LINE) + 1;
00268         string seq (pos, NL_CHAR);
00269         string qlt (pos, NL_CHAR);
00270 
00271         pos = 0;
00272         for ( i = 0; i < length_m; i += CHARS_PER_LINE )
00273           {
00274             last = i + CHARS_PER_LINE;
00275             if ( length_m < last )
00276               last = length_m;
00277             for ( j = i; j < last; ++ j )
00278               {
00279                 cp = getBase (j);
00280                 seq [pos] = cp . first;
00281                 qlt [pos] = cp . second;
00282                 ++ pos;
00283               }
00284             ++ pos;
00285           }
00286 
00287         msg . setField (F_SEQUENCE, seq);
00288         msg . setField (F_QUALITY, qlt);
00289       }
00290   }
00291   catch (ArgumentException_t) {
00292 
00293     msg . clear( );
00294     throw;
00295   }
00296 }
00297 
00298 
00299 //----------------------------------------------------- writeRecord ------------
00300 void Sequence_t::writeRecord (ostream & fix, ostream & var) const
00301 {
00302   Universal_t::writeRecord (fix, var);
00303 
00304   writeLE (fix, &length_m);
00305 
00306   var . write ((char *)seq_m, length_m);
00307 
00308   if ( !isCompressed( ) )
00309     var . write ((char *)qual_m, length_m);
00310 }
00311 
00312 
00313 //----------------------------------------------------- operator= --------------
00314 Sequence_t & Sequence_t::operator= (const Sequence_t & source)
00315 {
00316   if ( this != &source )
00317     {
00318       Universal_t::operator= (source);
00319 
00320       seq_m = (uint8_t *) SafeRealloc (seq_m, source . length_m);
00321       memcpy (seq_m, source . seq_m, source . length_m);
00322 
00323       if ( !source . isCompressed( ) )
00324         {
00325           qual_m = (uint8_t *) SafeRealloc (qual_m, source . length_m);
00326           memcpy (qual_m, source . qual_m, source . length_m);
00327         }
00328       else
00329         qual_m = NULL;
00330 
00331       length_m = source . length_m;
00332     }
00333 
00334   return *this;
00335 }

Generated on Mon Feb 22 17:36:27 2010 for libAMOS by  doxygen 1.4.7