00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef __Kmer_AMOS_HH
00011 #define __Kmer_AMOS_HH 1
00012
00013 #include "Universal_AMOS.hh"
00014 #include <vector>
00015 #include <string>
00016
00017
00018
00019
00020 namespace AMOS {
00021
00022
00028
00029 class Kmer_t : public Universal_t
00030 {
00031
00032 private:
00033
00034 uint8_t * seq_m;
00035 uint32_t count_m;
00036 uint8_t length_m;
00037 std::vector<ID_t> reads_m;
00038
00039
00040 protected:
00041
00042 static const uint8_t ADENINE_BITS = 0x0;
00043 static const uint8_t CYTOSINE_BITS = 0x40;
00044 static const uint8_t GUANINE_BITS = 0x80;
00045 static const uint8_t THYMINE_BITS = 0xC0;
00046 static const uint8_t SEQ_BITS = 0xC0;
00047
00048
00049
00059 static uint8_t compress (char seqchar)
00060 {
00061 switch ( toupper(seqchar) )
00062 {
00063 case 'A': return ADENINE_BITS;
00064 case 'C': return CYTOSINE_BITS;
00065 case 'G': return GUANINE_BITS;
00066 case 'T': return THYMINE_BITS;
00067 default:
00068 AMOS_THROW_ARGUMENT ((std::string)"Invalid Kmer character " + seqchar);
00069 }
00070 }
00071
00072
00073
00082 static char uncompress (uint8_t byte)
00083 {
00084 switch ( byte & SEQ_BITS )
00085 {
00086 case ADENINE_BITS: return 'A';
00087 case CYTOSINE_BITS: return 'C';
00088 case GUANINE_BITS: return 'G';
00089 case THYMINE_BITS: return 'T';
00090 default:
00091 AMOS_THROW_ARGUMENT ("Unknown logic error");
00092 }
00093 }
00094
00095
00096
00097 virtual void readRecord (std::istream & fix, std::istream & var);
00098
00099
00100
00101 virtual void readRecordFix (std::istream & fix);
00102
00103
00104
00105 virtual void writeRecord (std::ostream & fix, std::ostream & var) const;
00106
00107
00108 public:
00109
00110 static const NCode_t NCODE;
00112
00113 static const uint8_t MAX_LENGTH;
00115
00116
00117
00122 Kmer_t ( )
00123 {
00124 seq_m = NULL;
00125 count_m = length_m = 0;
00126 }
00127
00128
00129
00132 Kmer_t (const Kmer_t & source)
00133 {
00134 seq_m = NULL;
00135 *this = source;
00136 }
00137
00138
00139
00144 ~Kmer_t ( )
00145 {
00146 free (seq_m);
00147 }
00148
00149
00150
00151 virtual void clear ( );
00152
00153
00154
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 char getBase (Pos_t index) const
00176 {
00177 if ( index < 0 || index >= length_m )
00178 AMOS_THROW_ARGUMENT ("Requested kmer index is out of range");
00179 return uncompress ((seq_m [index / 4]) << (index % 4 * 2));
00180 }
00181
00182
00183
00188 uint32_t getCount ( ) const
00189 {
00190 return count_m;
00191 }
00192
00193
00194
00199 uint8_t getLength ( ) const
00200 {
00201 return length_m;
00202 }
00203
00204
00205
00206 virtual NCode_t getNCode ( ) const
00207 {
00208 return Kmer_t::NCODE;
00209 }
00210
00211
00212
00217 const std::vector<ID_t> & getReads ( ) const
00218 {
00219 return reads_m;
00220 }
00221
00222
00223
00228 std::vector<ID_t> & getReads ( )
00229 {
00230 return reads_m;
00231 }
00232
00233
00234
00241 std::string getSeqString ( ) const;
00242
00243
00244
00245 virtual void readMessage (const Message_t & msg);
00246
00247
00248
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272 void setBase (char seqchar, Pos_t index)
00273 {
00274 if ( index < 0 || index >= length_m )
00275 AMOS_THROW_ARGUMENT ("Requested kmer index is out of range");
00276
00277 int offset = index % 4 * 2;
00278 uint8_t * seqp = seq_m + index / 4;
00279
00280
00281 *seqp &= ~(SEQ_BITS >> offset);
00282 *seqp |= compress (seqchar) >> offset;
00283 }
00284
00285
00286
00292 void setCount (uint32_t count)
00293 {
00294 count_m = count;
00295 }
00296
00297
00298
00304 void setReads (const std::vector<ID_t> & reads)
00305 {
00306 reads_m = reads;
00307 }
00308
00309
00310
00324 void setSeqString (const std::string & seq);
00325
00326
00327
00334 Kmer_t & operator++ (int)
00335 {
00336 count_m ++;
00337 return *this;
00338 }
00339
00340
00341
00342 virtual void writeMessage (Message_t & msg) const;
00343
00344
00345
00353 Kmer_t & operator= (const Kmer_t & source);
00354 };
00355
00356 }
00357
00358 #endif // #ifndef __Kmer_AMOS_HH