00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "Kmer_AMOS.hh"
00011 using namespace AMOS;
00012 using namespace std;
00013
00014
00015
00016
00017
00018 const NCode_t Kmer_t::NCODE = M_KMER;
00019 const uint8_t Kmer_t::MAX_LENGTH = 255;
00020
00021
00022
00023 void Kmer_t::clear ( )
00024 {
00025 Universal_t::clear( );
00026 free (seq_m);
00027 seq_m = NULL;
00028 count_m = length_m = 0;
00029 reads_m . clear( );
00030 }
00031
00032
00033
00034 string Kmer_t::getSeqString ( ) const
00035 {
00036 string retval (length_m, NULL_CHAR);
00037
00038
00039 Pos_t ci = -1;
00040 uint8_t byte = 0;
00041 for ( Pos_t ui = 0; ui < length_m; ui ++ )
00042 {
00043 if ( ui % 4 == 0 )
00044 byte = seq_m [++ ci];
00045
00046 retval [ui] = uncompress (byte);
00047 byte <<= 2;
00048 }
00049
00050 return retval;
00051 }
00052
00053
00054
00055 void Kmer_t::readMessage (const Message_t & msg)
00056 {
00057 Universal_t::readMessage (msg);
00058
00059 try {
00060
00061 istringstream ss;
00062
00063 if ( msg . exists (F_COUNT) )
00064 {
00065 ss . str (msg . getField (F_COUNT));
00066 ss >> count_m;
00067 if ( !ss )
00068 AMOS_THROW_ARGUMENT ("Invalid count format");
00069 ss . clear( );
00070 }
00071
00072 if ( msg . exists (F_SEQUENCE) )
00073 setSeqString (msg . getField (F_SEQUENCE));
00074
00075 if ( msg . exists (F_READS) )
00076 {
00077 ID_t iid;
00078
00079 ss . str (msg . getField (F_READS));
00080
00081 while ( ss )
00082 {
00083 ss >> iid;
00084 if ( ! ss . fail( ) )
00085 reads_m . push_back (iid);
00086 }
00087
00088 if ( !ss . eof( ) )
00089 AMOS_THROW_ARGUMENT ("Invalid read link list format");
00090 ss . clear( );
00091 }
00092 }
00093 catch (ArgumentException_t) {
00094
00095 clear( );
00096 throw;
00097 }
00098 }
00099
00100
00101
00102 void Kmer_t::readRecord (istream & fix, istream & var)
00103 {
00104 Universal_t::readRecord (fix, var);
00105
00106 Size_t size;
00107 readLE (fix, &count_m);
00108 readLE (fix, &length_m);
00109 readLE (fix, &size);
00110
00111 reads_m . resize (size, NULL_ID);
00112 for ( Pos_t i = 0; i < size; i ++ )
00113 readLE (var, &(reads_m [i]));
00114
00115 size = length_m / 4 + (length_m % 4 ? 1 : 0);
00116 seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00117 var . read ((char *)seq_m, size);
00118 }
00119
00120
00121
00122 void Kmer_t::readRecordFix (istream & fix)
00123 {
00124 Universal_t::readRecordFix (fix);
00125
00126 Size_t size;
00127 readLE (fix, &count_m);
00128 readLE (fix, &length_m);
00129 readLE (fix, &size);
00130
00131 reads_m.clear();
00132
00133 free(seq_m);
00134 seq_m = NULL;
00135 }
00136
00137
00138
00139 void Kmer_t::setSeqString (const string & seq)
00140 {
00141 Size_t osize = seq . size( );
00142 Size_t size = osize;
00143 if ( size > Kmer_t::MAX_LENGTH )
00144 AMOS_THROW_ARGUMENT ("Invalid kmer sequence is too long");
00145
00146 size = size / 4 + (size % 4 ? 1 : 0);
00147 seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00148
00149
00150 Pos_t ci = -1;
00151 int offset = 8;
00152 length_m = 0;
00153 for ( Size_t ui = 0; ui < osize; ui ++ )
00154 {
00155 if ( seq [ui] == NL_CHAR )
00156 continue;
00157
00158 length_m ++;
00159 if ( offset >= 8 )
00160 {
00161 offset = 0;
00162 seq_m [++ ci] = 0;
00163 }
00164 seq_m [ci] |= compress (seq [ui]) >> offset;
00165 offset += 2;
00166 }
00167
00168 if ( length_m != osize )
00169 {
00170 size = length_m / 4 + (length_m % 4 ? 1 : 0);
00171 seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00172 }
00173 }
00174
00175
00176
00177 void Kmer_t::writeMessage (Message_t & msg) const
00178 {
00179 Universal_t::writeMessage (msg);
00180
00181 try {
00182
00183 ostringstream ss;
00184
00185 msg . setMessageCode (Kmer_t::NCODE);
00186
00187 ss << count_m;
00188 msg . setField (F_COUNT, ss . str( ));
00189 ss . str (NULL_STRING);
00190
00191 if ( length_m != 0 )
00192 msg . setField (F_SEQUENCE, getSeqString( ));
00193
00194 if ( !reads_m . empty( ) )
00195 {
00196 vector<ID_t>::const_iterator vi;
00197
00198 for ( vi = reads_m . begin( ); vi != reads_m . end( ); vi ++ )
00199 ss << *vi << endl;
00200 msg . setField (F_READS, ss . str( ));
00201 ss . str (NULL_STRING);
00202 }
00203 }
00204 catch (ArgumentException_t) {
00205
00206 msg . clear( );
00207 throw;
00208 }
00209 }
00210
00211
00212
00213 void Kmer_t::writeRecord (ostream & fix, ostream & var) const
00214 {
00215 Universal_t::writeRecord (fix, var);
00216
00217 Size_t size = reads_m . size( );
00218 writeLE (fix, &count_m);
00219 writeLE (fix, &length_m);
00220 writeLE (fix, &size);
00221
00222 for ( Pos_t i = 0; i < size; i ++ )
00223 writeLE (var, &(reads_m [i]));
00224
00225 size = length_m / 4 + (length_m % 4 ? 1 : 0);
00226 var . write ((char *)seq_m, size);
00227 }
00228
00229
00230
00231 Kmer_t & Kmer_t::operator= (const Kmer_t & source)
00232 {
00233 if ( this != &source )
00234 {
00235 Universal_t::operator= (source);
00236
00237 Size_t size = source . length_m / 4 + (source . length_m % 4 ? 1 : 0);
00238 seq_m = (uint8_t *) SafeRealloc (seq_m, size);
00239 memcpy (seq_m, source . seq_m, size);
00240
00241 count_m = source . count_m;
00242 length_m = source . length_m;
00243 reads_m = source . reads_m;
00244 }
00245
00246 return *this;
00247 }