00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
#include "Kmer_AMOS.hh"
00011
using namespace AMOS;
00012
using namespace std;
00013
00014
00015
00016
00017
00018 const NCode_t Kmer_t::NCODE =
M_KMER;
00019 const uint8_t Kmer_t::MAX_LENGTH = 255;
00020
00021
00022
00023 void Kmer_t::clear ( )
00024 {
00025
Universal_t::clear( );
00026 free (seq_m);
00027 seq_m = NULL;
00028 count_m = length_m = 0;
00029 reads_m .
clear( );
00030 }
00031
00032
00033
00034 string
Kmer_t::getSeqString ( )
const
00035
{
00036 string retval (length_m,
NULL_CHAR);
00037
00038
00039
Pos_t ci = -1;
00040 uint8_t byte = 0;
00041
for (
Pos_t ui = 0; ui < length_m; ui ++ )
00042 {
00043
if ( ui % 4 == 0 )
00044 byte = seq_m [++ ci];
00045
00046 retval [ui] =
uncompress (byte);
00047 byte <<= 2;
00048 }
00049
00050
return retval;
00051 }
00052
00053
00054
00055 void Kmer_t::readMessage (
const Message_t & msg)
00056 {
00057
Universal_t::readMessage (msg);
00058
00059
try {
00060
00061 istringstream ss;
00062
00063
if ( msg . exists (
F_COUNT) )
00064 {
00065 ss . str (msg . getField (
F_COUNT));
00066 ss >> count_m;
00067
if ( !ss )
00068
AMOS_THROW_ARGUMENT (
"Invalid count format");
00069 ss .
clear( );
00070 }
00071
00072
if ( msg . exists (
F_SEQUENCE) )
00073
setSeqString (msg . getField (
F_SEQUENCE));
00074
00075
if ( msg . exists (
F_READS) )
00076 {
00077
ID_t iid;
00078
00079 ss . str (msg . getField (
F_READS));
00080
00081
while ( ss )
00082 {
00083 ss >> iid;
00084
if ( ! ss . fail( ) )
00085 reads_m . push_back (iid);
00086 }
00087
00088
if ( !ss . eof( ) )
00089
AMOS_THROW_ARGUMENT (
"Invalid read link list format");
00090 ss .
clear( );
00091 }
00092 }
00093
catch (
ArgumentException_t) {
00094
00095
clear( );
00096
throw;
00097 }
00098 }
00099
00100
00101
00102 void Kmer_t::readRecord (istream & fix, istream & var)
00103 {
00104
Universal_t::readRecord (fix, var);
00105
00106
Size_t size;
00107
readLE (fix, &count_m);
00108
readLE (fix, &length_m);
00109
readLE (fix, &size);
00110
00111 reads_m . resize (size,
NULL_ID);
00112
for (
Pos_t i = 0; i < size; i ++ )
00113
readLE (var, &(reads_m [i]));
00114
00115 size = length_m / 4 + (length_m % 4 ? 1 : 0);
00116 seq_m = (uint8_t *)
SafeRealloc (seq_m, size);
00117 var . read ((
char *)seq_m, size);
00118 }
00119
00120
00121
00122 void Kmer_t::setSeqString (
const string & seq)
00123 {
00124
Size_t osize = seq . size( );
00125
Size_t size = osize;
00126
if ( size > Kmer_t::MAX_LENGTH )
00127
AMOS_THROW_ARGUMENT (
"Invalid kmer sequence is too long");
00128
00129 size = size / 4 + (size % 4 ? 1 : 0);
00130 seq_m = (uint8_t *)
SafeRealloc (seq_m, size);
00131
00132
00133
Pos_t ci = -1;
00134
int offset = 8;
00135 length_m = 0;
00136
for (
Size_t ui = 0; ui < osize; ui ++ )
00137 {
00138
if ( seq [ui] ==
NL_CHAR )
00139
continue;
00140
00141 length_m ++;
00142
if ( offset >= 8 )
00143 {
00144 offset = 0;
00145 seq_m [++ ci] = 0;
00146 }
00147 seq_m [ci] |=
compress (seq [ui]) >> offset;
00148 offset += 2;
00149 }
00150
00151
if ( length_m != osize )
00152 {
00153 size = length_m / 4 + (length_m % 4 ? 1 : 0);
00154 seq_m = (uint8_t *)
SafeRealloc (seq_m, size);
00155 }
00156 }
00157
00158
00159
00160 void Kmer_t::writeMessage (
Message_t & msg)
const
00161
{
00162
Universal_t::writeMessage (msg);
00163
00164
try {
00165
00166 ostringstream ss;
00167
00168 msg . setMessageCode (Kmer_t::NCODE);
00169
00170 ss << count_m;
00171 msg . setField (
F_COUNT, ss . str( ));
00172 ss . str (
NULL_STRING);
00173
00174
if ( length_m != 0 )
00175 msg . setField (
F_SEQUENCE,
getSeqString( ));
00176
00177
if ( !reads_m . empty( ) )
00178 {
00179 vector<ID_t>::const_iterator vi;
00180
00181
for ( vi = reads_m . begin( ); vi != reads_m . end( ); vi ++ )
00182 ss << *vi << endl;
00183 msg . setField (
F_READS, ss . str( ));
00184 ss . str (
NULL_STRING);
00185 }
00186 }
00187
catch (
ArgumentException_t) {
00188
00189 msg .
clear( );
00190
throw;
00191 }
00192 }
00193
00194
00195
00196 void Kmer_t::writeRecord (ostream & fix, ostream & var)
const
00197
{
00198
Universal_t::writeRecord (fix, var);
00199
00200
Size_t size = reads_m . size( );
00201
writeLE (fix, &count_m);
00202
writeLE (fix, &length_m);
00203
writeLE (fix, &size);
00204
00205
for (
Pos_t i = 0; i < size; i ++ )
00206
writeLE (var, &(reads_m [i]));
00207
00208 size = length_m / 4 + (length_m % 4 ? 1 : 0);
00209 var . write ((
char *)seq_m, size);
00210 }
00211
00212
00213
00214 Kmer_t &
Kmer_t::operator= (
const Kmer_t & source)
00215 {
00216
if (
this != &source )
00217 {
00218 Universal_t::operator= (source);
00219
00220
Size_t size = source . length_m / 4 + (source . length_m % 4 ? 1 : 0);
00221 seq_m = (uint8_t *)
SafeRealloc (seq_m, size);
00222 memcpy (seq_m, source . seq_m, size);
00223
00224 count_m = source . count_m;
00225 length_m = source . length_m;
00226 reads_m = source . reads_m;
00227 }
00228
00229
return *
this;
00230 }