00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
#include "Contig_AMOS.hh"
00011
#include <cctype>
00012
using namespace AMOS;
00013
using namespace std;
00014
00015
00016
00017
00018
00019 const NCode_t Contig_t::NCODE =
M_CONTIG;
00020
00021
00022
00023 Pos_t Contig_t::gap2ungap (
Pos_t gap)
const
00024
{
00025
Pos_t retval = gap;
00026
00027
for (
Pos_t i = 0; i < gap; ++ i )
00028
if ( ! isalpha (getBase (i) . first) )
00029 -- retval;
00030
00031
return retval;
00032 }
00033
00034
00035
00036 Size_t Contig_t::ungap2gap (
Pos_t ungap)
const
00037
{
00038
Pos_t retval = ungap;
00039
00040
for (
Pos_t i = 0; i <= retval; ++ i )
00041
if ( ! isalpha (getBase (i) . first) )
00042 ++ retval;
00043
00044
return retval;
00045 }
00046
00047
00048
00049 Size_t Contig_t::getSpan ( )
const
00050
{
00051
Pos_t hi,lo;
00052
00053
00054
if ( reads_m . empty( ) )
00055 {
00056 lo = hi = 0;
00057 }
00058
else
00059 {
00060 vector<Tile_t>::const_iterator ti = reads_m . begin( );
00061
00062 lo = ti -> offset;
00063 hi = ti -> offset + ti -> range .
getLength( );
00064
00065
for ( ++ ti; ti != reads_m . end( ); ++ ti )
00066 {
00067
if ( ti -> offset < lo )
00068 lo = ti -> offset;
00069
if ( ti -> offset + ti -> range .
getLength( ) > hi )
00070 hi = ti -> offset + ti -> range .
getLength( );
00071 }
00072 }
00073
00074
return hi - lo;
00075 }
00076
00077
00078
00079 Size_t Contig_t::getUngappedLength( )
const
00080
{
00081
Size_t retval = length_m;
00082
00083
for (
Pos_t i = 0; i < length_m; ++ i )
00084
if ( ! isalpha (getBase (i) . first) )
00085 -- retval;
00086
00087
return retval;
00088 }
00089
00090
00091
00092 string
Contig_t::getUngappedQualString (
Range_t range)
const
00093
{
00094
Pos_t lo = range . getLo( );
00095
Pos_t hi = range . getHi( );
00096
00097
00098
if ( lo < 0 || hi >
getLength( ) )
00099
AMOS_THROW_ARGUMENT (
"Invalid quality subrange");
00100
00101 string retval;
00102 retval . reserve (hi - lo);
00103 pair<char, char> seqqualc;
00104
00105
00106
for ( ; lo < hi; lo ++ )
00107 {
00108 seqqualc =
getBase (lo);
00109
if ( isalpha (seqqualc . first) )
00110 retval . push_back (seqqualc . second);
00111 }
00112
00113
if ( range . isReverse( ) )
00114
AMOS::Reverse (retval);
00115
00116
return retval;
00117 }
00118
00119
00120
00121 string
Contig_t::getUngappedSeqString (
Range_t range)
const
00122
{
00123
Pos_t lo = range . getLo( );
00124
Pos_t hi = range . getHi( );
00125
00126
00127
if ( lo < 0 || hi >
getLength( ) )
00128
AMOS_THROW_ARGUMENT (
"Invalid sequence subrange");
00129
00130 string retval;
00131 retval . reserve (hi - lo);
00132
char seqc;
00133
00134
00135
for ( ; lo < hi; lo ++ )
00136 {
00137 seqc =
getBase (lo) . first;
00138
if ( isalpha (seqc) )
00139 retval . push_back (seqc);
00140 }
00141
00142
if ( range . isReverse( ) )
00143
AMOS::ReverseComplement (retval);
00144
00145
return retval;
00146 }
00147
00148
00149
00150 void Contig_t::readMessage (
const Message_t & msg)
00151 {
00152
Sequence_t::readMessage (msg);
00153
00154
try {
00155 vector<Message_t>::const_iterator i;
00156
00157
for ( i = msg . getSubMessages( ) . begin( );
00158 i != msg . getSubMessages( ) . end( ); i ++ )
00159 {
00160
if ( i -> getMessageCode( ) ==
M_TILE )
00161 {
00162 reads_m . push_back (
Tile_t( ));
00163 reads_m . back( ) .
readMessage (*i);
00164 }
00165 }
00166 }
00167
catch (
ArgumentException_t) {
00168
00169
clear( );
00170
throw;
00171 }
00172
00173 }
00174
00175
00176
00177 void Contig_t::readRecord (istream & fix, istream & var)
00178 {
00179
Sequence_t::readRecord (fix, var);
00180
00181
Size_t sizet;
00182
readLE (fix, &sizet);
00183
00184 reads_m . resize (sizet);
00185
for (
Pos_t i = 0; i < sizet; i ++ )
00186 reads_m [i] .
readRecord (var);
00187 }
00188
00189
00190
00191 bool Contig_t::readUMD (istream & in)
00192 {
00193 string eid;
00194
Tile_t tile;
00195 istringstream ss;
00196 string line;
00197
00198
while ( line . empty( ) || line [0] !=
'C' )
00199 {
00200
if ( !in . good( ) )
00201
return false;
00202 getline (in, line);
00203 }
00204
00205
clear( );
00206
00207
try {
00208
00209 ss . str (line);
00210 ss . ignore( );
00211 ss >> eid;
00212
if ( !ss )
00213
AMOS_THROW_ARGUMENT (
"Invalid contig ID");
00214 ss .
clear( );
00215
setEID (eid);
00216
00217
while (
true )
00218 {
00219 getline (in, line);
00220
if ( line . empty( ) )
00221
break;
00222
00223 ss . str (line);
00224 ss >> tile . source;
00225 ss >> tile . range . begin;
00226 ss >> tile . range . end;
00227
if ( !ss )
00228
AMOS_THROW_ARGUMENT (
"Invalid read layout");
00229 ss .
clear( );
00230 tile . offset = tile . range . begin < tile . range . end ?
00231 tile . range . begin : tile . range . end;
00232 tile . range . begin -= tile . offset;
00233 tile . range . end -= tile . offset;
00234
getReadTiling( ) . push_back (tile);
00235 }
00236
00237 }
00238
catch (
IOException_t) {
00239
00240
00241
clear( );
00242
throw;
00243 }
00244
00245
return true;
00246 }
00247
00248
00249
00250 void Contig_t::writeMessage (
Message_t & msg)
const
00251
{
00252
Sequence_t::writeMessage (msg);
00253
00254
try {
00255
Pos_t begin = msg . getSubMessages( ) . size( );
00256 msg . getSubMessages( ) . resize (begin + reads_m . size( ));
00257
00258 msg . setMessageCode (Contig_t::NCODE);
00259
00260
if ( !reads_m . empty( ) )
00261 {
00262 vector<Tile_t>::const_iterator tvi;
00263
for ( tvi = reads_m . begin( ); tvi != reads_m . end( ); ++ tvi )
00264 tvi ->
writeMessage (msg . getSubMessages( ) [begin ++]);
00265 }
00266 }
00267
catch (
ArgumentException_t) {
00268
00269 msg .
clear( );
00270
throw;
00271 }
00272 }
00273
00274
00275
00276 void Contig_t::writeRecord (ostream & fix, ostream & var)
const
00277
{
00278
Sequence_t::writeRecord (fix, var);
00279
00280
Size_t sizet = reads_m . size( );
00281
writeLE (fix, &sizet);
00282
00283
for (
Pos_t i = 0; i < sizet; i ++ )
00284 reads_m [i] .
writeRecord (var);
00285 }
00286
00287
00288
00289 void Contig_t::writeUMD (ostream & out)
const
00290
{
00291 vector<Tile_t>::const_iterator ti;
00292
00293 out <<
"C " <<
getEID( ) << endl;
00294
00295
for ( ti = reads_m . begin( ); ti != reads_m . end( ); ti ++ )
00296 out << ti -> source <<
' '
00297 << ti -> range . begin + ti -> offset <<
' '
00298 << ti -> range . end + ti -> offset << endl;
00299
00300 out << endl;
00301
00302
if ( !out . good( ) )
00303
AMOS_THROW_IO (
"UMD message write failure");
00304 }