00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
#ifndef __Sequence_AMOS_HH
00011 #define __Sequence_AMOS_HH 1
00012
00013
#include "Universal_AMOS.hh"
00014
#include <string>
00015
00016
00017
00018
00019
namespace AMOS {
00020
00021
00035
00036 class Sequence_t :
public Universal_t
00037 {
00038
00039
protected:
00040
00041 uint8_t *
seq_m;
00042 uint8_t *
qual_m;
00043 Size_t length_m;
00044
00045
00046 static const uint8_t
COMPRESS_BIT = 0x1;
00047 static const uint8_t
ADENINE_BITS = 0x0;
00048 static const uint8_t
CYTOSINE_BITS = 0x40;
00049 static const uint8_t
GUANINE_BITS = 0x80;
00050 static const uint8_t
THYMINE_BITS = 0xC0;
00051 static const uint8_t
SEQ_BITS = 0xC0;
00052 static const uint8_t
QUAL_BITS = 0x3F;
00053
00054
00055
00064 static inline uint8_t
compress (
char seqchar,
char qualchar)
00065 {
00066
00067 qualchar =
Char2Qual (qualchar);
00068
00069
if ( qualchar &
SEQ_BITS )
00070
return 0;
00071
00072
switch ( toupper (seqchar) )
00073 {
00074
case 'A':
return (uint8_t)qualchar |
ADENINE_BITS;
00075
case 'C':
return (uint8_t)qualchar |
CYTOSINE_BITS;
00076
case 'G':
return (uint8_t)qualchar |
GUANINE_BITS;
00077
case 'T':
return (uint8_t)qualchar |
THYMINE_BITS;
00078
case 'N':
return 0;
00079
default:
00080
return 0;
00081 }
00082 }
00083
00084
00085
00093 static inline std::pair<char, char>
uncompress (uint8_t byte)
00094 {
00095 std::pair<char, char> retval;
00096
00097
switch ( byte &
SEQ_BITS )
00098 {
00099
case ADENINE_BITS: retval . first =
'A';
break;
00100
case CYTOSINE_BITS: retval . first =
'C';
break;
00101
case GUANINE_BITS: retval . first =
'G';
break;
00102
case THYMINE_BITS: retval . first =
'T';
break;
00103 }
00104
00105 byte &=
QUAL_BITS;
00106
if ( byte == 0 )
00107 retval . first =
'N';
00108
00109 retval . second =
Qual2Char (byte);
00110
00111
return retval;
00112 }
00113
00114
00115
00116
virtual void readRecord (std::istream & fix, std::istream & var);
00117
00118
00119
00120
virtual void writeRecord (std::ostream & fix, std::ostream & var)
const;
00121
00122
00123
public:
00124
00125
static const NCode_t NCODE;
00127
00128
00129
00134 Sequence_t ( )
00135 {
00136
seq_m =
qual_m = NULL;
00137
length_m = 0;
00138 }
00139
00140
00141
00144 Sequence_t (
const Sequence_t & source)
00145 {
00146
seq_m =
qual_m = NULL;
00147 *
this = source;
00148 }
00149
00150
00151
00156 virtual ~Sequence_t ( )
00157 {
00158 free (
seq_m);
00159 free (
qual_m);
00160 }
00161
00162
00163
00169
virtual void clear ( );
00170
00171
00172
00190
void compress ( );
00191
00192
00193
00203 std::pair<char, char>
getBase (
Pos_t index)
const
00204
{
00205
if ( index < 0 || index >=
length_m )
00206
AMOS_THROW_ARGUMENT (
"Requested sequence index is out of range");
00207
00208
if (
isCompressed( ) )
00209
return uncompress (
seq_m [index]);
00210
else
00211
return std::make_pair ((
char)(
seq_m [index]), (
char)(
qual_m [index]));
00212 }
00213
00214
00215
00220 Size_t getLength ( )
const
00221
{
00222
return length_m;
00223 }
00224
00225
00226
00227 virtual NCode_t getNCode ( )
const
00228
{
00229
return Sequence_t::NCODE;
00230 }
00231
00232
00233
00238 std::string
getQualString ( )
const
00239
{
00240
return getQualString (
Range_t (0,
length_m));
00241 }
00242
00243
00244
00255 std::string
getQualString (
Range_t range)
const;
00256
00257
00258
00263 std::string
getSeqString ( )
const
00264
{
00265
return getSeqString (
Range_t (0,
length_m));
00266 }
00267
00268
00269
00280 std::string
getSeqString (
Range_t range)
const;
00281
00282
00283
00291 bool isCompressed ( )
const
00292
{
00293
return flags_m . nibble &
COMPRESS_BIT;
00294 }
00295
00296
00297
00298
virtual void readMessage (
const Message_t & msg);
00299
00300
00301
00321 void setBase (
char seqchar,
char qualchar,
Pos_t index)
00322 {
00323
if ( index < 0 || index >=
length_m )
00324
AMOS_THROW_ARGUMENT (
"Requested sequence index is out of range");
00325
00326
if (
isCompressed( ) )
00327
seq_m [index] =
compress (seqchar, qualchar);
00328
else
00329 {
00330
seq_m [index] = seqchar;
00331
qual_m [index] = qualchar;
00332 }
00333 }
00334
00335
00336
00351
void setSequence (
const char * seq,
const char * qual);
00352
00353
00354
00369
void setSequence (
const std::string & seq,
const std::string & qual);
00370
00371
00372
00382
void uncompress ( );
00383
00384
00385
00393
Sequence_t &
operator= (
const Sequence_t & source);
00394
00395
00396
00397
virtual void writeMessage (
Message_t & msg)
const;
00398
00399 };
00400
00401 }
00402
00403
#endif // #ifndef __Sequence_AMOS_HH