00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "Sequence_AMOS.hh"
00011 using namespace AMOS;
00012 using namespace std;
00013 #define CHARS_PER_LINE 70
00014
00015
00016
00017
00018
00019 const NCode_t Sequence_t::NCODE = M_SEQUENCE;
00020
00021
00022
00023 void Sequence_t::clear ( )
00024 {
00025 bool compress = flags_m . nibble & COMPRESS_BIT;
00026 Universal_t::clear( );
00027 free (seq_m);
00028 free (qual_m);
00029 seq_m = qual_m = NULL;
00030 length_m = 0;
00031 if ( compress )
00032 flags_m . nibble |= COMPRESS_BIT;
00033 }
00034
00035
00036
00037 void Sequence_t::compress ( )
00038 {
00039 if ( isCompressed( ) )
00040 return;
00041
00042
00043 flags_m . nibble |= COMPRESS_BIT;
00044
00045 if (seq_m == NULL)
00046 return;
00047
00048
00049 for ( Pos_t i = 0; i < length_m; i ++ )
00050 seq_m [i] = compress (seq_m [i], qual_m [i]);
00051
00052
00053 free (qual_m);
00054 qual_m = NULL;
00055 }
00056
00057
00058
00059 string Sequence_t::getQualString (Range_t range) const
00060 {
00061 Pos_t lo = range . getLo( );
00062 Pos_t hi = range . getHi( );
00063
00064
00065 if ( lo < 0 || hi > length_m )
00066 AMOS_THROW_ARGUMENT ("Invalid quality subrange");
00067
00068
00069 string retval (hi - lo, NULL_CHAR);
00070
00071
00072 for ( Pos_t i = 0; lo < hi; i ++, lo ++ )
00073 retval [i] = getBase (lo) . second;
00074
00075 if ( range . isReverse( ) )
00076 AMOS::Reverse (retval);
00077
00078 return retval;
00079 }
00080
00081
00082
00083 string Sequence_t::getSeqString (Range_t range) const
00084 {
00085 Pos_t lo = range . getLo( );
00086 Pos_t hi = range . getHi( );
00087
00088
00089 if ( lo < 0 || hi > length_m )
00090 AMOS_THROW_ARGUMENT ("Invalid sequence subrange");
00091
00092
00093 string retval (hi - lo, NULL_CHAR);
00094
00095
00096 for ( Pos_t i = 0; lo < hi; i ++, lo ++ )
00097 retval [i] = getBase (lo) . first;
00098
00099 if ( range . isReverse( ) )
00100 AMOS::ReverseComplement (retval);
00101
00102 return retval;
00103 }
00104
00105
00106
00107 void Sequence_t::readMessage (const Message_t & msg)
00108 {
00109 Universal_t::readMessage (msg);
00110
00111 try {
00112
00113 if ( msg . exists (F_SEQUENCE) &&
00114 msg . exists (F_QUALITY) )
00115 setSequence (msg . getField (F_SEQUENCE),
00116 msg . getField (F_QUALITY));
00117 else if ( msg . exists (F_SEQUENCE) ||
00118 msg . exists (F_QUALITY) )
00119 AMOS_THROW_ARGUMENT ("Missing sequence or quality field");
00120 }
00121 catch (ArgumentException_t) {
00122
00123 clear( );
00124 throw;
00125 }
00126 }
00127
00128
00129
00130 void Sequence_t::readRecord (istream & fix, istream & var)
00131 {
00132 Universal_t::readRecord (fix, var);
00133
00134 readLE (fix, &length_m);
00135
00136 seq_m = (uint8_t *) SafeRealloc (seq_m, length_m);
00137 var . read ((char *)seq_m, length_m);
00138
00139 if ( !isCompressed( ) )
00140 {
00141 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00142 var . read ((char *)qual_m, length_m);
00143 }
00144 }
00145
00146
00147 void Sequence_t::readRecordFix (istream & fix)
00148 {
00149 Universal_t::readRecordFix (fix);
00150
00151 readLE (fix, &length_m);
00152
00153 free(seq_m);
00154 free(qual_m);
00155
00156 seq_m = qual_m = NULL;
00157 }
00158
00159
00160
00161
00162
00163 void Sequence_t::setSequence (const char * seq, const char * qual)
00164 {
00165
00166 Size_t length = strlen (seq);
00167 if ( length != (Size_t)strlen (qual) )
00168 AMOS_THROW_ARGUMENT ("Sequence and quality lengths disagree");
00169
00170
00171 seq_m = (uint8_t *) SafeRealloc (seq_m, length);
00172 if ( !isCompressed( ) )
00173 qual_m = (uint8_t *) SafeRealloc (qual_m, length);
00174
00175 length_m = 0;
00176 for ( Pos_t i = 0; i < length; i ++ )
00177 {
00178 if ( seq [i] == NL_CHAR && qual [i] == NL_CHAR )
00179 continue;
00180 else if ( seq [i] == NL_CHAR || qual [i] == NL_CHAR )
00181 AMOS_THROW_ARGUMENT ("Invalid newline found in seq and qlt data");
00182
00183 length_m ++;
00184 setBase (seq [i], qual [i], length_m - 1);
00185 }
00186
00187 if ( length_m != length )
00188 {
00189 seq_m = (uint8_t *) SafeRealloc (seq_m, length_m);
00190 if ( !isCompressed( ) )
00191 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00192 }
00193 }
00194
00195
00196
00197 void Sequence_t::setSequence (const string & seq, const string & qual)
00198 {
00199
00200 Size_t length = seq . size( );
00201 if ( length != (Size_t)qual . size( ) )
00202 AMOS_THROW_ARGUMENT ("Sequence and quality lengths disagree");
00203
00204
00205 seq_m = (uint8_t *) SafeRealloc (seq_m, length);
00206 if ( !isCompressed( ) )
00207 qual_m = (uint8_t *) SafeRealloc (qual_m, length);
00208
00209 length_m = 0;
00210 for ( Pos_t i = 0; i < length; i ++ )
00211 {
00212 if ( seq [i] == NL_CHAR && qual [i] == NL_CHAR )
00213 continue;
00214 else if ( seq [i] == NL_CHAR || qual [i] == NL_CHAR )
00215 AMOS_THROW_ARGUMENT ("Invalid newline found in seq and qlt data");
00216
00217 length_m ++;
00218 setBase (seq [i], qual [i], length_m - 1);
00219 }
00220
00221 if ( length_m != length )
00222 {
00223 seq_m = (uint8_t *) SafeRealloc (seq_m, length_m);
00224 if ( !isCompressed( ) )
00225 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00226 }
00227 }
00228
00229
00230
00231 void Sequence_t::uncompress ( )
00232 {
00233 if ( !isCompressed( ) )
00234 return;
00235
00236
00237 flags_m . nibble &= ~COMPRESS_BIT;
00238
00239 if (seq_m == NULL)
00240 return;
00241
00242
00243 pair<char, char> retval;
00244 qual_m = (uint8_t *) SafeRealloc (qual_m, length_m);
00245 for ( Pos_t i = 0; i < length_m; i ++ )
00246 {
00247 retval = uncompress (seq_m [i]);
00248 seq_m [i] = retval . first;
00249 qual_m [i] = retval . second;
00250 }
00251 }
00252
00253
00254
00255 void Sequence_t::writeMessage (Message_t & msg) const
00256 {
00257 Universal_t::writeMessage (msg);
00258
00259 try {
00260
00261 msg . setMessageCode (Sequence_t::NCODE);
00262
00263 if ( length_m != 0 && seq_m )
00264 {
00265 pair<char, char> cp;
00266 Pos_t i, j, last;
00267 Size_t pos = length_m + ((length_m - 1) / CHARS_PER_LINE) + 1;
00268 string seq (pos, NL_CHAR);
00269 string qlt (pos, NL_CHAR);
00270
00271 pos = 0;
00272 for ( i = 0; i < length_m; i += CHARS_PER_LINE )
00273 {
00274 last = i + CHARS_PER_LINE;
00275 if ( length_m < last )
00276 last = length_m;
00277 for ( j = i; j < last; ++ j )
00278 {
00279 cp = getBase (j);
00280 seq [pos] = cp . first;
00281 qlt [pos] = cp . second;
00282 ++ pos;
00283 }
00284 ++ pos;
00285 }
00286
00287 msg . setField (F_SEQUENCE, seq);
00288 msg . setField (F_QUALITY, qlt);
00289 }
00290 }
00291 catch (ArgumentException_t) {
00292
00293 msg . clear( );
00294 throw;
00295 }
00296 }
00297
00298
00299
00300 void Sequence_t::writeRecord (ostream & fix, ostream & var) const
00301 {
00302 Universal_t::writeRecord (fix, var);
00303
00304 writeLE (fix, &length_m);
00305
00306 var . write ((char *)seq_m, length_m);
00307
00308 if ( !isCompressed( ) )
00309 var . write ((char *)qual_m, length_m);
00310 }
00311
00312
00313
00314 Sequence_t & Sequence_t::operator= (const Sequence_t & source)
00315 {
00316 if ( this != &source )
00317 {
00318 Universal_t::operator= (source);
00319
00320 seq_m = (uint8_t *) SafeRealloc (seq_m, source . length_m);
00321 memcpy (seq_m, source . seq_m, source . length_m);
00322
00323 if ( !source . isCompressed( ) )
00324 {
00325 qual_m = (uint8_t *) SafeRealloc (qual_m, source . length_m);
00326 memcpy (qual_m, source . qual_m, source . length_m);
00327 }
00328 else
00329 qual_m = NULL;
00330
00331 length_m = source . length_m;
00332 }
00333
00334 return *this;
00335 }