ContigIterator_AMOS.cc

Go to the documentation of this file.
00001 // $Id: ContigIterator_AMOS.cc,v 1.11 2006/04/05 19:03:12 mcschatz Exp $
00002 
00003 #include "ContigIterator_AMOS.hh"
00004 
00005 using namespace std;
00006 using namespace AMOS;
00007 
00008 
00009 TiledRead_t::TiledRead_t(Tile_t tile, Read_t red, int readidx)
00010     : m_seq(red.getSeqString(tile.range)), 
00011       m_qual(red.getQualString(tile.range)), 
00012       m_readidx(readidx),
00013       m_fragid(red.getFragment()),
00014       m_iid(red.getIID()),
00015       m_eid(red.getEID())
00016 {
00017   int gappos;
00018 
00019   try
00020   {
00021     for (int i = 0; i < tile.gaps.size(); i++)
00022     {
00023       gappos = tile.gaps[i] + i;
00024       m_seq.insert(gappos, 1,  '-');
00025 
00026       char lqv = (gappos > 0) ? m_qual[gappos-1] : -1;
00027       char rqv = (gappos < m_qual.size()) ? m_qual[gappos] : -1;
00028 
00029       char gapqv = (lqv < rqv)
00030                    ? (lqv != -1) ? lqv : rqv
00031                    : (rqv != -1) ? rqv : lqv;
00032 
00033       m_qual.insert(gappos, 1, gapqv);
00034     }
00035   }
00036   catch(...)
00037   {
00038     AMOS_THROW_ARGUMENT((string)"Error inserting gaps into read: " + red.getEID());
00039   }
00040 
00041   m_loffset = tile.offset;
00042   m_roffset = tile.offset + tile.getGappedLength() - 1;
00043   m_isRC = tile.range.isReverse();
00044 
00045   int l = m_seq.size();
00046 
00047   for (int i = 0; i < l; i++)
00048   {
00049     m_seq[i] = toupper(m_seq[i]);
00050   }
00051 }
00052 
00053 int32_t ContigIterator_t::s_tilingreadsoffset(0);
00054 
00055 ContigIterator_t::ContigIterator_t(Contig_t & ctg, Bank_t * rdbank)
00056  : m_tilingreadsoffset(s_tilingreadsoffset)
00057 {
00058   m_gindex = -1;
00059   m_uindex = 1;
00060 
00061   m_readBank = rdbank;
00062   m_contig = ctg;
00063 
00064   m_consensus = m_contig.getSeqString();
00065   m_consqual = m_contig.getQualString();
00066 
00067   vector<Tile_t> & tiling = m_contig.getReadTiling();
00068   sort(tiling.begin(), tiling.end(), TileOrderCmp());
00069   m_currenttile = 0;
00070   m_numreads = tiling.size();
00071   s_tilingreadsoffset += m_numreads;
00072 }
00073 
00074 Pos_t ContigIterator_t::uindex() const
00075 {
00076   if (m_consensus[m_gindex] == '-' && m_uindex > 1)
00077   {
00078     return m_uindex-1;
00079   }
00080 
00081   return m_uindex;
00082 }
00083 
00084 void ContigIterator_t::renderTile(Tile_t & tile, int tilingindex)
00085 {
00086   Read_t rd;
00087 
00088   m_readBank->fetch(tile.source, rd);
00089   TiledRead_t trd(tile, rd, tilingindex+m_tilingreadsoffset);
00090 
00091   TiledReadList_t::iterator ins = m_tilingreads.insert(m_tilingreads.end(), trd);
00092 
00093   m_ends.push(ins);
00094 }
00095 
00096 
00097 // Move to the next position
00098 bool ContigIterator_t::advanceNext()
00099 {
00100   int len = m_consensus.size();
00101   if (m_gindex >= len) { return false; }
00102 
00103   if (m_gindex != -1 && m_consensus[m_gindex] != '-') { m_uindex++; }
00104   m_gindex++;
00105 
00106   vector<Tile_t> & tiling = m_contig.getReadTiling();
00107 
00108   while ((m_currenttile < m_numreads) && (tiling[m_currenttile].offset <= m_gindex))
00109   { 
00110     renderTile(tiling[m_currenttile], m_currenttile);
00111     m_currenttile++;
00112   }
00113 
00114   while (!m_ends.empty() && (m_ends.top()->m_roffset < m_gindex))
00115   { 
00116     m_tilingreads.erase(m_ends.top());
00117     m_ends.pop();
00118   }
00119 
00120   return (m_gindex < len);
00121 }
00122 
00123 // Seek to a random position in the contig
00124 bool ContigIterator_t::seek(Pos_t gindex)
00125 {
00126   if (gindex >= m_consensus.size()) { return false; }
00127 
00128   bool reuseexistingtiling = (gindex >= m_gindex);
00129 
00130   if (!reuseexistingtiling)
00131   {
00132     // Can't go backwards, so flush the tiling and reload from scratch
00133     m_tilingreads.clear();
00134     while (!m_ends.empty()) { m_ends.pop(); }
00135     m_currenttile = 0;
00136   }
00137 
00138   m_gindex = gindex;
00139   m_uindex = m_contig.gap2ungap(gindex);
00140 
00141   // load the reads that overlap this position
00142   vector<Tile_t> & tiling = m_contig.getReadTiling();
00143 
00144   while (m_currenttile < m_numreads && tiling[m_currenttile].offset <= m_gindex)
00145   {
00146     Pos_t roffset = tiling[m_currenttile].offset + tiling[m_currenttile].getGappedLength() - 1;
00147     if (roffset >= m_gindex)
00148     {
00149       // This read covers the seek position
00150       renderTile(tiling[m_currenttile], m_currenttile);
00151     }
00152 
00153     m_currenttile++;
00154   }
00155 
00156   if (reuseexistingtiling)
00157   {
00158     while (!m_ends.empty() && m_ends.top()->m_roffset < m_gindex)
00159     { 
00160       m_tilingreads.erase(m_ends.top());
00161       m_ends.pop();
00162     }
00163   }
00164 
00165   return true;
00166 }
00167 
00168 
00169 bool ContigIterator_t::hasSNP() const
00170 {
00171   TiledReadList_t::const_iterator tle;
00172   TiledReadList_t::const_iterator end;
00173 
00174   char lastbase = ' ';
00175   for (tle = m_tilingreads.begin(); tle != m_tilingreads.end(); tle++)
00176   {
00177     char base = tle->base(m_gindex);
00178 
00179     if (lastbase != ' ' && lastbase != base)
00180     {
00181       return true;
00182     }
00183 
00184     lastbase = base;
00185   }
00186 
00187   return false;
00188 }
00189 
00190 BaseStats_t::BaseStats_t(char base)
00191  : m_base(base), m_cumqv(0), m_maxqv(0)
00192 { }
00193 
00194 void BaseStats_t::addRead(TiledReadList_t::const_iterator tile, Pos_t gindex)
00195 {
00196   int qv = tile->qv(gindex);
00197   m_cumqv += qv;
00198 
00199   if (qv > m_maxqv) { m_maxqv = qv; }
00200 
00201   m_reads.push_back(tile);
00202 }
00203 
00204 Column_t::Column_t(ContigIterator_t & ci)
00205  : m_gindex(ci.gindex()),
00206    m_uindex(ci.uindex()),
00207    m_cons(ci.cons()),
00208    m_cqv(ci.cqv()),
00209    m_depth(0)
00210 {
00211   TiledReadList_t::const_iterator tle;
00212   map<char, BaseStats_t>::iterator si;
00213 
00214   for (tle = ci.getTilingReads().begin(); tle != ci.getTilingReads().end(); tle++)
00215   { 
00216     assert(m_gindex >= tle->m_loffset &&
00217            m_gindex <= tle->m_roffset);
00218 
00219     m_depth++;
00220 
00221     char base = tle->base(m_gindex);
00222 
00223     si = m_baseinfo.find(base);
00224     if (si == m_baseinfo.end())
00225     {
00226       si = m_baseinfo.insert(make_pair(base, BaseStats_t(base))).first;
00227     }
00228 
00229     si->second.addRead(tle, m_gindex);
00230   }
00231 }
00232 
00233 Column_t ContigIterator_t::getColumn()
00234 {
00235   return Column_t(*this);
00236 }
00237 
00238 vector<BaseStats_t *> Column_t::getBaseInfo()
00239 {
00240   vector<BaseStats_t *> retval;
00241 
00242   for (map<char, BaseStats_t>::iterator si = m_baseinfo.begin();
00243        si != m_baseinfo.end();
00244        si++)
00245   {
00246     retval.push_back(&si->second);
00247   }
00248 
00249   sort(retval.begin(), retval.end(), BaseStatsCmp());
00250 
00251   return retval;
00252 }
00253 

Generated on Mon Feb 22 17:36:27 2010 for libAMOS by  doxygen 1.4.7