00001
00002
00003 #include "ContigIterator_AMOS.hh"
00004
00005 using namespace std;
00006 using namespace AMOS;
00007
00008
00009 TiledRead_t::TiledRead_t(Tile_t tile, Read_t red, int readidx)
00010 : m_seq(red.getSeqString(tile.range)),
00011 m_qual(red.getQualString(tile.range)),
00012 m_readidx(readidx),
00013 m_fragid(red.getFragment()),
00014 m_iid(red.getIID()),
00015 m_eid(red.getEID())
00016 {
00017 int gappos;
00018
00019 try
00020 {
00021 for (int i = 0; i < tile.gaps.size(); i++)
00022 {
00023 gappos = tile.gaps[i] + i;
00024 m_seq.insert(gappos, 1, '-');
00025
00026 char lqv = (gappos > 0) ? m_qual[gappos-1] : -1;
00027 char rqv = (gappos < m_qual.size()) ? m_qual[gappos] : -1;
00028
00029 char gapqv = (lqv < rqv)
00030 ? (lqv != -1) ? lqv : rqv
00031 : (rqv != -1) ? rqv : lqv;
00032
00033 m_qual.insert(gappos, 1, gapqv);
00034 }
00035 }
00036 catch(...)
00037 {
00038 AMOS_THROW_ARGUMENT((string)"Error inserting gaps into read: " + red.getEID());
00039 }
00040
00041 m_loffset = tile.offset;
00042 m_roffset = tile.offset + tile.getGappedLength() - 1;
00043 m_isRC = tile.range.isReverse();
00044
00045 int l = m_seq.size();
00046
00047 for (int i = 0; i < l; i++)
00048 {
00049 m_seq[i] = toupper(m_seq[i]);
00050 }
00051 }
00052
00053 int32_t ContigIterator_t::s_tilingreadsoffset(0);
00054
00055 ContigIterator_t::ContigIterator_t(Contig_t & ctg, Bank_t * rdbank)
00056 : m_tilingreadsoffset(s_tilingreadsoffset)
00057 {
00058 m_gindex = -1;
00059 m_uindex = 1;
00060
00061 m_readBank = rdbank;
00062 m_contig = ctg;
00063
00064 m_consensus = m_contig.getSeqString();
00065 m_consqual = m_contig.getQualString();
00066
00067 vector<Tile_t> & tiling = m_contig.getReadTiling();
00068 sort(tiling.begin(), tiling.end(), TileOrderCmp());
00069 m_currenttile = 0;
00070 m_numreads = tiling.size();
00071 s_tilingreadsoffset += m_numreads;
00072 }
00073
00074 Pos_t ContigIterator_t::uindex() const
00075 {
00076 if (m_consensus[m_gindex] == '-' && m_uindex > 1)
00077 {
00078 return m_uindex-1;
00079 }
00080
00081 return m_uindex;
00082 }
00083
00084 void ContigIterator_t::renderTile(Tile_t & tile, int tilingindex)
00085 {
00086 Read_t rd;
00087
00088 m_readBank->fetch(tile.source, rd);
00089 TiledRead_t trd(tile, rd, tilingindex+m_tilingreadsoffset);
00090
00091 TiledReadList_t::iterator ins = m_tilingreads.insert(m_tilingreads.end(), trd);
00092
00093 m_ends.push(ins);
00094 }
00095
00096
00097
00098 bool ContigIterator_t::advanceNext()
00099 {
00100 int len = m_consensus.size();
00101 if (m_gindex >= len) { return false; }
00102
00103 if (m_gindex != -1 && m_consensus[m_gindex] != '-') { m_uindex++; }
00104 m_gindex++;
00105
00106 vector<Tile_t> & tiling = m_contig.getReadTiling();
00107
00108 while ((m_currenttile < m_numreads) && (tiling[m_currenttile].offset <= m_gindex))
00109 {
00110 renderTile(tiling[m_currenttile], m_currenttile);
00111 m_currenttile++;
00112 }
00113
00114 while (!m_ends.empty() && (m_ends.top()->m_roffset < m_gindex))
00115 {
00116 m_tilingreads.erase(m_ends.top());
00117 m_ends.pop();
00118 }
00119
00120 return (m_gindex < len);
00121 }
00122
00123
00124 bool ContigIterator_t::seek(Pos_t gindex)
00125 {
00126 if (gindex >= m_consensus.size()) { return false; }
00127
00128 bool reuseexistingtiling = (gindex >= m_gindex);
00129
00130 if (!reuseexistingtiling)
00131 {
00132
00133 m_tilingreads.clear();
00134 while (!m_ends.empty()) { m_ends.pop(); }
00135 m_currenttile = 0;
00136 }
00137
00138 m_gindex = gindex;
00139 m_uindex = m_contig.gap2ungap(gindex);
00140
00141
00142 vector<Tile_t> & tiling = m_contig.getReadTiling();
00143
00144 while (m_currenttile < m_numreads && tiling[m_currenttile].offset <= m_gindex)
00145 {
00146 Pos_t roffset = tiling[m_currenttile].offset + tiling[m_currenttile].getGappedLength() - 1;
00147 if (roffset >= m_gindex)
00148 {
00149
00150 renderTile(tiling[m_currenttile], m_currenttile);
00151 }
00152
00153 m_currenttile++;
00154 }
00155
00156 if (reuseexistingtiling)
00157 {
00158 while (!m_ends.empty() && m_ends.top()->m_roffset < m_gindex)
00159 {
00160 m_tilingreads.erase(m_ends.top());
00161 m_ends.pop();
00162 }
00163 }
00164
00165 return true;
00166 }
00167
00168
00169 bool ContigIterator_t::hasSNP() const
00170 {
00171 TiledReadList_t::const_iterator tle;
00172 TiledReadList_t::const_iterator end;
00173
00174 char lastbase = ' ';
00175 for (tle = m_tilingreads.begin(); tle != m_tilingreads.end(); tle++)
00176 {
00177 char base = tle->base(m_gindex);
00178
00179 if (lastbase != ' ' && lastbase != base)
00180 {
00181 return true;
00182 }
00183
00184 lastbase = base;
00185 }
00186
00187 return false;
00188 }
00189
00190 BaseStats_t::BaseStats_t(char base)
00191 : m_base(base), m_cumqv(0), m_maxqv(0)
00192 { }
00193
00194 void BaseStats_t::addRead(TiledReadList_t::const_iterator tile, Pos_t gindex)
00195 {
00196 int qv = tile->qv(gindex);
00197 m_cumqv += qv;
00198
00199 if (qv > m_maxqv) { m_maxqv = qv; }
00200
00201 m_reads.push_back(tile);
00202 }
00203
00204 Column_t::Column_t(ContigIterator_t & ci)
00205 : m_gindex(ci.gindex()),
00206 m_uindex(ci.uindex()),
00207 m_cons(ci.cons()),
00208 m_cqv(ci.cqv()),
00209 m_depth(0)
00210 {
00211 TiledReadList_t::const_iterator tle;
00212 map<char, BaseStats_t>::iterator si;
00213
00214 for (tle = ci.getTilingReads().begin(); tle != ci.getTilingReads().end(); tle++)
00215 {
00216 assert(m_gindex >= tle->m_loffset &&
00217 m_gindex <= tle->m_roffset);
00218
00219 m_depth++;
00220
00221 char base = tle->base(m_gindex);
00222
00223 si = m_baseinfo.find(base);
00224 if (si == m_baseinfo.end())
00225 {
00226 si = m_baseinfo.insert(make_pair(base, BaseStats_t(base))).first;
00227 }
00228
00229 si->second.addRead(tle, m_gindex);
00230 }
00231 }
00232
00233 Column_t ContigIterator_t::getColumn()
00234 {
00235 return Column_t(*this);
00236 }
00237
00238 vector<BaseStats_t *> Column_t::getBaseInfo()
00239 {
00240 vector<BaseStats_t *> retval;
00241
00242 for (map<char, BaseStats_t>::iterator si = m_baseinfo.begin();
00243 si != m_baseinfo.end();
00244 si++)
00245 {
00246 retval.push_back(&si->second);
00247 }
00248
00249 sort(retval.begin(), retval.end(), BaseStatsCmp());
00250
00251 return retval;
00252 }
00253