00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
#ifndef __Kmer_AMOS_HH
00011 #define __Kmer_AMOS_HH 1
00012
00013
#include "Universal_AMOS.hh"
00014
#include <vector>
00015
#include <string>
00016
00017
00018
00019
00020
namespace AMOS {
00021
00022
00028
00029 class Kmer_t :
public Universal_t
00030 {
00031
00032
private:
00033
00034 uint8_t * seq_m;
00035 uint32_t count_m;
00036 uint8_t length_m;
00037 std::vector<ID_t> reads_m;
00038
00039
00040
protected:
00041
00042 static const uint8_t
ADENINE_BITS = 0x0;
00043 static const uint8_t
CYTOSINE_BITS = 0x40;
00044 static const uint8_t
GUANINE_BITS = 0x80;
00045 static const uint8_t
THYMINE_BITS = 0xC0;
00046 static const uint8_t
SEQ_BITS = 0xC0;
00047
00048
00049
00059 static uint8_t
compress (
char seqchar)
00060 {
00061
switch ( toupper(seqchar) )
00062 {
00063
case 'A':
return ADENINE_BITS;
00064
case 'C':
return CYTOSINE_BITS;
00065
case 'G':
return GUANINE_BITS;
00066
case 'T':
return THYMINE_BITS;
00067
default:
00068
AMOS_THROW_ARGUMENT ((std::string)
"Invalid Kmer character " + seqchar);
00069 }
00070 }
00071
00072
00073
00082 static char uncompress (uint8_t byte)
00083 {
00084
switch ( byte &
SEQ_BITS )
00085 {
00086
case ADENINE_BITS:
return 'A';
00087
case CYTOSINE_BITS:
return 'C';
00088
case GUANINE_BITS:
return 'G';
00089
case THYMINE_BITS:
return 'T';
00090
default:
00091
AMOS_THROW_ARGUMENT (
"Unknown logic error");
00092 }
00093 }
00094
00095
00096
00097
virtual void readRecord (std::istream & fix, std::istream & var);
00098
00099
00100
00101
virtual void writeRecord (std::ostream & fix, std::ostream & var)
const;
00102
00103
00104
public:
00105
00106
static const NCode_t NCODE;
00108
00109
static const uint8_t MAX_LENGTH;
00111
00112
00113
00118 Kmer_t ( )
00119 {
00120 seq_m = NULL;
00121 count_m = length_m = 0;
00122 }
00123
00124
00125
00128 Kmer_t (
const Kmer_t & source)
00129 {
00130 seq_m = NULL;
00131 *
this = source;
00132 }
00133
00134
00135
00140 ~Kmer_t ( )
00141 {
00142 free (seq_m);
00143 }
00144
00145
00146
00147
virtual void clear ( );
00148
00149
00150
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171 char getBase (
Pos_t index)
const
00172
{
00173
if ( index < 0 || index >= length_m )
00174
AMOS_THROW_ARGUMENT (
"Requested kmer index is out of range");
00175
return uncompress ((seq_m [index / 4]) << (index % 4 * 2));
00176 }
00177
00178
00179
00184 uint32_t
getCount ( )
const
00185
{
00186
return count_m;
00187 }
00188
00189
00190
00195 uint8_t
getLength ( )
const
00196
{
00197
return length_m;
00198 }
00199
00200
00201
00202 virtual NCode_t getNCode ( )
const
00203
{
00204
return Kmer_t::NCODE;
00205 }
00206
00207
00208
00213 const std::vector<ID_t> &
getReads ( )
const
00214
{
00215
return reads_m;
00216 }
00217
00218
00219
00224 std::vector<ID_t> &
getReads ( )
00225 {
00226
return reads_m;
00227 }
00228
00229
00230
00237 std::string
getSeqString ( ) const;
00238
00239
00240
00241 virtual
void readMessage (const
Message_t & msg);
00242
00243
00244
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268 void setBase (
char seqchar,
Pos_t index)
00269 {
00270
if ( index < 0 || index >= length_m )
00271
AMOS_THROW_ARGUMENT (
"Requested kmer index is out of range");
00272
00273
int offset = index % 4 * 2;
00274 uint8_t * seqp = seq_m + index / 4;
00275
00276
00277 *seqp &= ~(
SEQ_BITS >> offset);
00278 *seqp |=
compress (seqchar) >> offset;
00279 }
00280
00281
00282
00288 void setCount (uint32_t count)
00289 {
00290 count_m = count;
00291 }
00292
00293
00294
00300 void setReads (
const std::vector<ID_t> & reads)
00301 {
00302 reads_m = reads;
00303 }
00304
00305
00306
00320
void setSeqString (
const std::string & seq);
00321
00322
00323
00330 Kmer_t &
operator++ (
int)
00331 {
00332 count_m ++;
00333
return *
this;
00334 }
00335
00336
00337
00338
virtual void writeMessage (
Message_t & msg)
const;
00339
00340
00341
00349
Kmer_t &
operator= (
const Kmer_t & source);
00350 };
00351
00352 }
00353
00354
#endif // #ifndef __Kmer_AMOS_HH