00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
#include "Sequence_AMOS.hh"
00011
using namespace AMOS;
00012
using namespace std;
00013 #define CHARS_PER_LINE 70
00014
00015
00016
00017
00018
00019 const NCode_t Sequence_t::NCODE =
M_SEQUENCE;
00020
00021
00022
00023 void Sequence_t::clear ( )
00024 {
00025
bool compress = flags_m . nibble &
COMPRESS_BIT;
00026
Universal_t::clear( );
00027 free (
seq_m);
00028 free (
qual_m);
00029
seq_m =
qual_m = NULL;
00030
length_m = 0;
00031
if ( compress )
00032 flags_m . nibble |= COMPRESS_BIT;
00033 }
00034
00035
00036
00037 void Sequence_t::compress ( )
00038 {
00039
if (
isCompressed( ) )
00040
return;
00041
00042
00043
for (
Pos_t i = 0; i <
length_m; i ++ )
00044
seq_m [i] =
compress (
seq_m [i],
qual_m [i]);
00045
00046
00047 free (
qual_m);
00048
qual_m = NULL;
00049
00050
00051 flags_m . nibble |=
COMPRESS_BIT;
00052 }
00053
00054
00055
00056 string
Sequence_t::getQualString (
Range_t range)
const
00057
{
00058
Pos_t lo = range . getLo( );
00059
Pos_t hi = range . getHi( );
00060
00061
00062
if ( lo < 0 || hi >
length_m )
00063
AMOS_THROW_ARGUMENT (
"Invalid quality subrange");
00064
00065
00066 string retval (hi - lo,
NULL_CHAR);
00067
00068
00069
for (
Pos_t i = 0; lo < hi; i ++, lo ++ )
00070 retval [i] =
getBase (lo) . second;
00071
00072
if ( range . isReverse( ) )
00073
AMOS::Reverse (retval);
00074
00075
return retval;
00076 }
00077
00078
00079
00080 string
Sequence_t::getSeqString (
Range_t range)
const
00081
{
00082
Pos_t lo = range . getLo( );
00083
Pos_t hi = range . getHi( );
00084
00085
00086
if ( lo < 0 || hi >
length_m )
00087
AMOS_THROW_ARGUMENT (
"Invalid sequence subrange");
00088
00089
00090 string retval (hi - lo,
NULL_CHAR);
00091
00092
00093
for (
Pos_t i = 0; lo < hi; i ++, lo ++ )
00094 retval [i] =
getBase (lo) . first;
00095
00096
if ( range . isReverse( ) )
00097
AMOS::ReverseComplement (retval);
00098
00099
return retval;
00100 }
00101
00102
00103
00104 void Sequence_t::readMessage (
const Message_t & msg)
00105 {
00106
Universal_t::readMessage (msg);
00107
00108
try {
00109
00110
if ( msg . exists (
F_SEQUENCE) &&
00111 msg . exists (
F_QUALITY) )
00112
setSequence (msg . getField (
F_SEQUENCE),
00113 msg . getField (
F_QUALITY));
00114
else if ( msg . exists (
F_SEQUENCE) ||
00115 msg . exists (
F_QUALITY) )
00116
AMOS_THROW_ARGUMENT (
"Missing sequence or quality field");
00117 }
00118
catch (
ArgumentException_t) {
00119
00120
clear( );
00121
throw;
00122 }
00123 }
00124
00125
00126
00127 void Sequence_t::readRecord (istream & fix, istream & var)
00128 {
00129
Universal_t::readRecord (fix, var);
00130
00131
readLE (fix, &
length_m);
00132
00133
seq_m = (uint8_t *)
SafeRealloc (
seq_m,
length_m);
00134 var . read ((
char *)
seq_m,
length_m);
00135
00136
if ( !
isCompressed( ) )
00137 {
00138
qual_m = (uint8_t *)
SafeRealloc (
qual_m,
length_m);
00139 var . read ((
char *)
qual_m,
length_m);
00140 }
00141 }
00142
00143
00144
00145 void Sequence_t::setSequence (
const char * seq,
const char * qual)
00146 {
00147
00148
Size_t length = strlen (seq);
00149
if ( length != (
Size_t)strlen (qual) )
00150
AMOS_THROW_ARGUMENT (
"Sequence and quality lengths disagree");
00151
00152
00153
seq_m = (uint8_t *)
SafeRealloc (
seq_m, length);
00154
if ( !
isCompressed( ) )
00155
qual_m = (uint8_t *)
SafeRealloc (
qual_m, length);
00156
00157
length_m = 0;
00158
for (
Pos_t i = 0; i < length; i ++ )
00159 {
00160
if ( seq [i] ==
NL_CHAR && qual [i] ==
NL_CHAR )
00161
continue;
00162
else if ( seq [i] ==
NL_CHAR || qual [i] ==
NL_CHAR )
00163
AMOS_THROW_ARGUMENT (
"Invalid newline found in seq and qlt data");
00164
00165
length_m ++;
00166
setBase (seq [i], qual [i],
length_m - 1);
00167 }
00168
00169
if (
length_m != length )
00170 {
00171
seq_m = (uint8_t *)
SafeRealloc (
seq_m,
length_m);
00172
if ( !
isCompressed( ) )
00173
qual_m = (uint8_t *)
SafeRealloc (
qual_m,
length_m);
00174 }
00175 }
00176
00177
00178
00179 void Sequence_t::setSequence (
const string & seq,
const string & qual)
00180 {
00181
00182
Size_t length = seq . size( );
00183
if ( length != (
Size_t)qual . size( ) )
00184
AMOS_THROW_ARGUMENT (
"Sequence and quality lengths disagree");
00185
00186
00187
seq_m = (uint8_t *)
SafeRealloc (
seq_m, length);
00188
if ( !
isCompressed( ) )
00189
qual_m = (uint8_t *)
SafeRealloc (
qual_m, length);
00190
00191
length_m = 0;
00192
for (
Pos_t i = 0; i < length; i ++ )
00193 {
00194
if ( seq [i] ==
NL_CHAR && qual [i] ==
NL_CHAR )
00195
continue;
00196
else if ( seq [i] ==
NL_CHAR || qual [i] ==
NL_CHAR )
00197
AMOS_THROW_ARGUMENT (
"Invalid newline found in seq and qlt data");
00198
00199
length_m ++;
00200
setBase (seq [i], qual [i],
length_m - 1);
00201 }
00202
00203
if (
length_m != length )
00204 {
00205
seq_m = (uint8_t *)
SafeRealloc (
seq_m,
length_m);
00206
if ( !
isCompressed( ) )
00207
qual_m = (uint8_t *)
SafeRealloc (
qual_m,
length_m);
00208 }
00209 }
00210
00211
00212
00213 void Sequence_t::uncompress ( )
00214 {
00215
if ( !
isCompressed( ) )
00216
return;
00217
00218
00219 pair<char, char> retval;
00220
qual_m = (uint8_t *)
SafeRealloc (
qual_m,
length_m);
00221
for (
Pos_t i = 0; i <
length_m; i ++ )
00222 {
00223 retval =
uncompress (
seq_m [i]);
00224
seq_m [i] = retval . first;
00225
qual_m [i] = retval . second;
00226 }
00227
00228
00229 flags_m . nibble &= ~
COMPRESS_BIT;
00230 }
00231
00232
00233
00234 void Sequence_t::writeMessage (
Message_t & msg)
const
00235
{
00236
Universal_t::writeMessage (msg);
00237
00238
try {
00239
00240 msg . setMessageCode (Sequence_t::NCODE);
00241
00242
if (
length_m != 0 )
00243 {
00244 pair<char, char> cp;
00245
Pos_t i, j, last;
00246
Size_t pos =
length_m + ((
length_m - 1) /
CHARS_PER_LINE) + 1;
00247 string seq (pos,
NL_CHAR);
00248 string qlt (pos,
NL_CHAR);
00249
00250 pos = 0;
00251
for ( i = 0; i <
length_m; i +=
CHARS_PER_LINE )
00252 {
00253 last = i +
CHARS_PER_LINE;
00254
if ( length_m < last )
00255 last = length_m;
00256
for ( j = i; j < last; ++ j )
00257 {
00258 cp =
getBase (j);
00259 seq [pos] = cp . first;
00260 qlt [pos] = cp . second;
00261 ++ pos;
00262 }
00263 ++ pos;
00264 }
00265
00266 msg . setField (
F_SEQUENCE, seq);
00267 msg . setField (
F_QUALITY, qlt);
00268 }
00269 }
00270
catch (
ArgumentException_t) {
00271
00272 msg .
clear( );
00273
throw;
00274 }
00275 }
00276
00277
00278
00279 void Sequence_t::writeRecord (ostream & fix, ostream & var)
const
00280
{
00281
Universal_t::writeRecord (fix, var);
00282
00283
writeLE (fix, &
length_m);
00284
00285 var . write ((
char *)
seq_m,
length_m);
00286
00287
if ( !
isCompressed( ) )
00288 var . write ((
char *)
qual_m,
length_m);
00289 }
00290
00291
00292
00293 Sequence_t &
Sequence_t::operator= (
const Sequence_t & source)
00294 {
00295
if (
this != &source )
00296 {
00297 Universal_t::operator= (source);
00298
00299
seq_m = (uint8_t *)
SafeRealloc (
seq_m, source .
length_m);
00300 memcpy (
seq_m, source .
seq_m, source . length_m);
00301
00302
if ( !source .
isCompressed( ) )
00303 {
00304
qual_m = (uint8_t *)
SafeRealloc (
qual_m, source . length_m);
00305 memcpy (
qual_m, source .
qual_m, source . length_m);
00306 }
00307
else
00308
qual_m = NULL;
00309
00310 length_m = source . length_m;
00311 }
00312
00313
return *
this;
00314 }