00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
#ifndef __Kmer_AMOS_HH
00011 #define __Kmer_AMOS_HH 1
00012 
00013 
#include "Universal_AMOS.hh"
00014 
#include <vector>
00015 
#include <string>
00016 
00017 
00018 
00019 
00020 
namespace AMOS {
00021 
00022 
00028 
00029 class Kmer_t : 
public Universal_t
00030 {
00031   
00032 
private:
00033 
00034   uint8_t * seq_m;              
00035   uint32_t count_m;             
00036   uint8_t length_m;             
00037   std::vector<ID_t> reads_m;    
00038 
00039 
00040 
protected:
00041 
00042   static const uint8_t 
ADENINE_BITS  = 0x0;   
00043   static const uint8_t 
CYTOSINE_BITS = 0x40;  
00044   static const uint8_t 
GUANINE_BITS  = 0x80;  
00045   static const uint8_t 
THYMINE_BITS  = 0xC0;  
00046   static const uint8_t 
SEQ_BITS      = 0xC0;  
00047 
00048 
00049   
00059   static uint8_t 
compress (
char seqchar)
00060   {
00061     
switch ( toupper(seqchar) )
00062       {
00063       
case 'A': 
return ADENINE_BITS;
00064       
case 'C': 
return CYTOSINE_BITS;
00065       
case 'G': 
return GUANINE_BITS;
00066       
case 'T': 
return THYMINE_BITS;
00067       
default:
00068         
AMOS_THROW_ARGUMENT ((std::string)
"Invalid Kmer character " + seqchar);
00069       }
00070   }
00071 
00072 
00073   
00082   static char uncompress (uint8_t byte)
00083   {
00084     
switch ( byte & 
SEQ_BITS )
00085       {
00086       
case ADENINE_BITS:  
return 'A';
00087       
case CYTOSINE_BITS: 
return 'C';
00088       
case GUANINE_BITS:  
return 'G';
00089       
case THYMINE_BITS:  
return 'T';
00090       
default:
00091         
AMOS_THROW_ARGUMENT (
"Unknown logic error");
00092       }
00093   }
00094 
00095 
00096   
00097   
virtual void readRecord (std::istream & fix, std::istream & var);
00098 
00099 
00100   
00101   
virtual void writeRecord (std::ostream & fix, std::ostream & var) 
const;
00102 
00103 
00104 
public:
00105 
00106   
static const NCode_t NCODE;
00108 
00109   
static const uint8_t MAX_LENGTH;
00111 
00112 
00113   
00118   Kmer_t ( )
00119   {
00120     seq_m = NULL;
00121     count_m = length_m = 0;
00122   }
00123 
00124 
00125   
00128   Kmer_t (
const Kmer_t & source)
00129   {
00130     seq_m = NULL;
00131     *
this = source;
00132   }
00133 
00134 
00135   
00140   ~Kmer_t ( )
00141   {
00142     free (seq_m);
00143   }
00144 
00145 
00146   
00147   
virtual void clear ( );
00148 
00149 
00150   
00160   
00161   
00162   
00163   
00164   
00165   
00166   
00167   
00168   
00169   
00170   
00171   char getBase (
Pos_t index)
 const
00172 
  {
00173     
if ( index < 0 || index >= length_m )
00174       
AMOS_THROW_ARGUMENT (
"Requested kmer index is out of range");
00175     
return uncompress ((seq_m [index / 4]) << (index % 4 * 2));
00176   }
00177 
00178 
00179   
00184   uint32_t 
getCount ( )
 const
00185 
  {
00186     
return count_m;
00187   }
00188 
00189 
00190   
00195   uint8_t 
getLength ( )
 const
00196 
  {
00197     
return length_m;
00198   }
00199 
00200 
00201   
00202   virtual NCode_t getNCode ( )
 const
00203 
  {
00204     
return Kmer_t::NCODE;
00205   }
00206 
00207 
00208   
00213   const std::vector<ID_t> & 
getReads ( )
 const
00214 
  {
00215     
return reads_m;
00216   }
00217 
00218 
00219   
00224   std::vector<ID_t> & 
getReads ( )
00225   {
00226     
return reads_m;
00227   }
00228 
00229 
00230   
00237   std::string 
getSeqString ( ) const;
00238 
00239 
00240   
00241   virtual 
void readMessage (const 
Message_t & msg);
00242 
00243 
00244   
00257   
00258   
00259   
00260   
00261   
00262   
00263   
00264   
00265   
00266   
00267   
00268   void setBase (
char seqchar, 
Pos_t index)
00269   {
00270     
if ( index < 0 || index >= length_m )
00271       
AMOS_THROW_ARGUMENT (
"Requested kmer index is out of range");
00272 
00273     
int offset = index % 4 * 2;              
00274     uint8_t * seqp = seq_m + index / 4;      
00275 
00276     
00277     *seqp &= ~(
SEQ_BITS >> offset);
00278     *seqp |= 
compress (seqchar) >> offset;
00279   }
00280 
00281 
00282   
00288   void setCount (uint32_t count)
00289   {
00290     count_m = count;
00291   }
00292 
00293 
00294   
00300   void setReads (
const std::vector<ID_t> & reads)
00301   {
00302     reads_m = reads;
00303   }
00304 
00305 
00306   
00320   
void setSeqString (
const std::string & seq);
00321 
00322 
00323   
00330   Kmer_t & 
operator++ (
int)
00331   {
00332     count_m ++;
00333     
return *
this;
00334   }
00335 
00336 
00337   
00338   
virtual void writeMessage (
Message_t & msg) 
const;
00339 
00340 
00341   
00349   
Kmer_t & 
operator= (
const Kmer_t & source);
00350 };
00351 
00352 } 
00353 
00354 
#endif // #ifndef __Kmer_AMOS_HH