Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

Sequence_AMOS.cc

Go to the documentation of this file.
00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 #include "Sequence_AMOS.hh" 00011 using namespace AMOS; 00012 using namespace std; 00013 #define CHARS_PER_LINE 70 00014 00015 00016 00017 00018 //================================================ Sequence_t ================== 00019 const NCode_t Sequence_t::NCODE = M_SEQUENCE; 00020 00021 00022 //----------------------------------------------------- clear ------------------ 00023 void Sequence_t::clear ( ) 00024 { 00025 bool compress = flags_m . nibble & COMPRESS_BIT; 00026 Universal_t::clear( ); 00027 free (seq_m); 00028 free (qual_m); 00029 seq_m = qual_m = NULL; 00030 length_m = 0; 00031 if ( compress ) 00032 flags_m . nibble |= COMPRESS_BIT; 00033 } 00034 00035 00036 //----------------------------------------------------- compress --------------- 00037 void Sequence_t::compress ( ) 00038 { 00039 if ( isCompressed( ) ) 00040 return; 00041 00042 //-- Store compressed data in seq_m 00043 for ( Pos_t i = 0; i < length_m; i ++ ) 00044 seq_m [i] = compress (seq_m [i], qual_m [i]); 00045 00046 //-- Free qual_m, it is no longer used 00047 free (qual_m); 00048 qual_m = NULL; 00049 00050 //-- store compression flag in bit COMPRESS_BIT 00051 flags_m . nibble |= COMPRESS_BIT; 00052 } 00053 00054 00055 //----------------------------------------------------- getQualString ---------- 00056 string Sequence_t::getQualString (Range_t range) const 00057 { 00058 Pos_t lo = range . getLo( ); 00059 Pos_t hi = range . getHi( ); 00060 00061 //-- Check preconditions 00062 if ( lo < 0 || hi > length_m ) 00063 AMOS_THROW_ARGUMENT ("Invalid quality subrange"); 00064 00065 //-- Allocate space for retval 00066 string retval (hi - lo, NULL_CHAR); 00067 00068 //-- Fill retval 00069 for ( Pos_t i = 0; lo < hi; i ++, lo ++ ) 00070 retval [i] = getBase (lo) . second; 00071 00072 if ( range . isReverse( ) ) 00073 AMOS::Reverse (retval); 00074 00075 return retval; 00076 } 00077 00078 00079 //----------------------------------------------------- getSeqString ----------- 00080 string Sequence_t::getSeqString (Range_t range) const 00081 { 00082 Pos_t lo = range . getLo( ); 00083 Pos_t hi = range . getHi( ); 00084 00085 //-- Check preconditions 00086 if ( lo < 0 || hi > length_m ) 00087 AMOS_THROW_ARGUMENT ("Invalid sequence subrange"); 00088 00089 //-- Allocate space for retval 00090 string retval (hi - lo, NULL_CHAR); 00091 00092 //-- Fill retval 00093 for ( Pos_t i = 0; lo < hi; i ++, lo ++ ) 00094 retval [i] = getBase (lo) . first; 00095 00096 if ( range . isReverse( ) ) 00097 AMOS::ReverseComplement (retval); 00098 00099 return retval; 00100 } 00101 00102 00103 //--------------------------------------------------- readMessage ------------ 00104 void Sequence_t::readMessage (const Message_t & msg) 00105 { 00106 Universal_t::readMessage (msg); 00107 00108 try { 00109 00110 if ( msg . exists (F_SEQUENCE) && 00111 msg . exists (F_QUALITY) ) 00112 setSequence (msg . getField (F_SEQUENCE), 00113 msg . getField (F_QUALITY)); 00114 else if ( msg . exists (F_SEQUENCE) || 00115 msg . exists (F_QUALITY) ) 00116 AMOS_THROW_ARGUMENT ("Missing sequence or quality field"); 00117 } 00118 catch (ArgumentException_t) { 00119 00120 clear( ); 00121 throw; 00122 } 00123 } 00124 00125 00126 //----------------------------------------------------- readRecord ------------- 00127 void Sequence_t::readRecord (istream & fix, istream & var) 00128 { 00129 Universal_t::readRecord (fix, var); 00130 00131 readLE (fix, &length_m); 00132 00133 seq_m = (uint8_t *) SafeRealloc (seq_m, length_m); 00134 var . read ((char *)seq_m, length_m); 00135 00136 if ( !isCompressed( ) ) 00137 { 00138 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m); 00139 var . read ((char *)qual_m, length_m); 00140 } 00141 } 00142 00143 00144 //----------------------------------------------------- setSequence ------------ 00145 void Sequence_t::setSequence (const char * seq, const char * qual) 00146 { 00147 //-- Check preconditions 00148 Size_t length = strlen (seq); 00149 if ( length != (Size_t)strlen (qual) ) 00150 AMOS_THROW_ARGUMENT ("Sequence and quality lengths disagree"); 00151 00152 //-- Set the sequence 00153 seq_m = (uint8_t *) SafeRealloc (seq_m, length); 00154 if ( !isCompressed( ) ) 00155 qual_m = (uint8_t *) SafeRealloc (qual_m, length); 00156 00157 length_m = 0; 00158 for ( Pos_t i = 0; i < length; i ++ ) 00159 { 00160 if ( seq [i] == NL_CHAR && qual [i] == NL_CHAR ) 00161 continue; 00162 else if ( seq [i] == NL_CHAR || qual [i] == NL_CHAR ) 00163 AMOS_THROW_ARGUMENT ("Invalid newline found in seq and qlt data"); 00164 00165 length_m ++; 00166 setBase (seq [i], qual [i], length_m - 1); 00167 } 00168 00169 if ( length_m != length ) 00170 { 00171 seq_m = (uint8_t *) SafeRealloc (seq_m, length_m); 00172 if ( !isCompressed( ) ) 00173 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m); 00174 } 00175 } 00176 00177 00178 //----------------------------------------------------- setSequence ------------ 00179 void Sequence_t::setSequence (const string & seq, const string & qual) 00180 { 00181 //-- Check preconditions 00182 Size_t length = seq . size( ); 00183 if ( length != (Size_t)qual . size( ) ) 00184 AMOS_THROW_ARGUMENT ("Sequence and quality lengths disagree"); 00185 00186 //-- Set the sequence 00187 seq_m = (uint8_t *) SafeRealloc (seq_m, length); 00188 if ( !isCompressed( ) ) 00189 qual_m = (uint8_t *) SafeRealloc (qual_m, length); 00190 00191 length_m = 0; 00192 for ( Pos_t i = 0; i < length; i ++ ) 00193 { 00194 if ( seq [i] == NL_CHAR && qual [i] == NL_CHAR ) 00195 continue; 00196 else if ( seq [i] == NL_CHAR || qual [i] == NL_CHAR ) 00197 AMOS_THROW_ARGUMENT ("Invalid newline found in seq and qlt data"); 00198 00199 length_m ++; 00200 setBase (seq [i], qual [i], length_m - 1); 00201 } 00202 00203 if ( length_m != length ) 00204 { 00205 seq_m = (uint8_t *) SafeRealloc (seq_m, length_m); 00206 if ( !isCompressed( ) ) 00207 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m); 00208 } 00209 } 00210 00211 00212 //----------------------------------------------------- uncompress ------------- 00213 void Sequence_t::uncompress ( ) 00214 { 00215 if ( !isCompressed( ) ) 00216 return; 00217 00218 //-- Uncompress, move quality scores back to qual_m 00219 pair<char, char> retval; 00220 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m); 00221 for ( Pos_t i = 0; i < length_m; i ++ ) 00222 { 00223 retval = uncompress (seq_m [i]); 00224 seq_m [i] = retval . first; 00225 qual_m [i] = retval . second; 00226 } 00227 00228 //-- store compression flag in bit COMPRESS_BIT 00229 flags_m . nibble &= ~COMPRESS_BIT; 00230 } 00231 00232 00233 //----------------------------------------------------- writeMessage ----------- 00234 void Sequence_t::writeMessage (Message_t & msg) const 00235 { 00236 Universal_t::writeMessage (msg); 00237 00238 try { 00239 00240 msg . setMessageCode (Sequence_t::NCODE); 00241 00242 if ( length_m != 0 ) 00243 { 00244 pair<char, char> cp; 00245 Pos_t i, j, last; 00246 Size_t pos = length_m + ((length_m - 1) / CHARS_PER_LINE) + 1; 00247 string seq (pos, NL_CHAR); 00248 string qlt (pos, NL_CHAR); 00249 00250 pos = 0; 00251 for ( i = 0; i < length_m; i += CHARS_PER_LINE ) 00252 { 00253 last = i + CHARS_PER_LINE; 00254 if ( length_m < last ) 00255 last = length_m; 00256 for ( j = i; j < last; ++ j ) 00257 { 00258 cp = getBase (j); 00259 seq [pos] = cp . first; 00260 qlt [pos] = cp . second; 00261 ++ pos; 00262 } 00263 ++ pos; 00264 } 00265 00266 msg . setField (F_SEQUENCE, seq); 00267 msg . setField (F_QUALITY, qlt); 00268 } 00269 } 00270 catch (ArgumentException_t) { 00271 00272 msg . clear( ); 00273 throw; 00274 } 00275 } 00276 00277 00278 //----------------------------------------------------- writeRecord ------------ 00279 void Sequence_t::writeRecord (ostream & fix, ostream & var) const 00280 { 00281 Universal_t::writeRecord (fix, var); 00282 00283 writeLE (fix, &length_m); 00284 00285 var . write ((char *)seq_m, length_m); 00286 00287 if ( !isCompressed( ) ) 00288 var . write ((char *)qual_m, length_m); 00289 } 00290 00291 00292 //----------------------------------------------------- operator= -------------- 00293 Sequence_t & Sequence_t::operator= (const Sequence_t & source) 00294 { 00295 if ( this != &source ) 00296 { 00297 Universal_t::operator= (source); 00298 00299 seq_m = (uint8_t *) SafeRealloc (seq_m, source . length_m); 00300 memcpy (seq_m, source . seq_m, source . length_m); 00301 00302 if ( !source . isCompressed( ) ) 00303 { 00304 qual_m = (uint8_t *) SafeRealloc (qual_m, source . length_m); 00305 memcpy (qual_m, source . qual_m, source . length_m); 00306 } 00307 else 00308 qual_m = NULL; 00309 00310 length_m = source . length_m; 00311 } 00312 00313 return *this; 00314 }

Generated on Tue May 17 15:19:02 2005 for libAMOS by doxygen 1.3.8