00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <Sequence/shortestPath.hpp>
00025 #include <Sequence/PathwayHelper.hpp>
00026 #include <Sequence/Translate.hpp>
00027 #include <Sequence/Grantham.hpp>
00028 #include <Sequence/SeqConstants.hpp>
00029 #include <Sequence/SeqProperties.hpp>
00030 #include <algorithm>
00031
00032 namespace Sequence
00033 {
00034 #ifndef DOXYGEN_SKIP //doxygen should skip this
00035 struct shortestPathImpl
00036 {
00037 shortestPath::pathType _type;
00038 Sequence::Grantham distances;
00039 double _distance;
00040 std::string t1,t2;
00041 std::vector<std::string> _path;
00042 std::pair<double,shortestPath::pathType>process_path(const std::string &intermediate,
00043 const Sequence::GeneticCodes & code);
00044 std::pair<double,shortestPath::pathType>process_path2(const std::string &intermediate1,
00045 const std::string &intermediate2,
00046 const Sequence::GeneticCodes & code);
00047 shortestPathImpl(const std::string &codon1,
00048 const std::string &codon2,
00049 const Sequence::GeneticCodes & code);
00050 };
00051
00052 shortestPathImpl::shortestPathImpl(const std::string &codon1,
00053 const std::string &codon2,
00054 const Sequence::GeneticCodes & code):
00055 _type(shortestPath::AMBIG),
00056 distances(Sequence::Grantham()),
00057 _distance(0.)
00058 {
00059 if(codon1.length()!=3 || codon2.length() != 3)
00060 {
00061 throw Sequence::SeqException("Codons are not both of length 3");
00062 }
00063 t1 = Sequence::Translate(codon1.begin(),codon1.end(),code);
00064 t2 = Sequence::Translate(codon2.begin(),codon2.end(),code);
00065
00066 _path.push_back(codon1);
00067
00068 unsigned ndiffs = Sequence::NumDiffs(codon1,codon2);
00069 if (ndiffs == 0)
00070 {
00071 _type = shortestPath::NONE;
00072 }
00073 else if (t1 != "X" && t2 != "X" &&
00074 (std::find_if(codon1.begin(),codon1.end(),
00075 ambiguousNucleotide()) == codon1.end()) &&
00076 (std::find_if(codon2.begin(),codon2.end(),
00077 ambiguousNucleotide()) == codon2.end()) )
00078 {
00079 if (ndiffs == 1)
00080 {
00081 _distance = distances(t1[0],t2[0]);
00082 if(t1 != t2)
00083 {
00084 _type = shortestPath::N;
00085 }
00086 else
00087 {
00088 _type = shortestPath::S;
00089 }
00090
00091 }
00092 else if (ndiffs == 2)
00093 {
00094 std::string intermediates[2];
00095 Sequence::Intermediates2(intermediates,codon1,codon2);
00096 std::string t_int1 = Sequence::Translate(intermediates[0].begin(),
00097 intermediates[0].end());
00098 std::string t_int2 = Sequence::Translate(intermediates[1].begin(),
00099 intermediates[1].end());
00100
00101 std::vector< std::pair<double, shortestPath::pathType> >
00102 paths(2,std::pair<double, shortestPath::pathType>(0.,shortestPath::AMBIG));
00103
00104
00105
00106 paths[0] = process_path(intermediates[0],code);
00107
00108 paths[1] = process_path(intermediates[1],code);
00109 if (paths[0].first < paths[1].first)
00110 {
00111 _type = paths[0].second;
00112 _distance = paths[0].first;
00113 _path.push_back(intermediates[0]);
00114 }
00115 else
00116 {
00117 _type = paths[1].second;
00118 _distance = paths[1].first;
00119 _path.push_back(intermediates[1]);
00120 }
00121 }
00122 else if (ndiffs == 3)
00123 {
00124 std::string intermediates[9];
00125 Sequence::Intermediates3(intermediates,codon1,codon2);
00126 std::vector< std::pair<double, shortestPath::pathType> >
00127 paths(6,std::pair<double, shortestPath::pathType>(0.,shortestPath::AMBIG));
00128
00129
00130
00131 paths[0] = process_path2(intermediates[0],intermediates[1],code);
00132 paths[1] = process_path2(intermediates[0],intermediates[2],code);
00133 paths[2] = process_path2(intermediates[3],intermediates[4],code);
00134 paths[3] = process_path2(intermediates[3],intermediates[5],code);
00135 paths[4] = process_path2(intermediates[6],intermediates[7],code);
00136 paths[5] = process_path2(intermediates[6],intermediates[8],code);
00137
00138 size_t shortest = 0;
00139 double shortest_path = paths[0].first;
00140 for(unsigned i = 0 ; i < 6 ; ++i)
00141 {
00142 if (paths[i].first < shortest_path)
00143 {
00144 shortest=i;
00145 shortest_path=paths[i].first;
00146 }
00147 }
00148 _type = paths[shortest].second;
00149 _distance = paths[shortest].first;
00150 switch (shortest)
00151 {
00152 case 0:
00153 _path.push_back(intermediates[0]);
00154 _path.push_back(intermediates[1]);
00155 break;
00156 case 1:
00157 _path.push_back(intermediates[0]);
00158 _path.push_back(intermediates[2]);
00159 break;
00160 case 2:
00161 _path.push_back(intermediates[3]);
00162 _path.push_back(intermediates[4]);
00163 break;
00164 case 3:
00165 _path.push_back(intermediates[3]);
00166 _path.push_back(intermediates[5]);
00167 break;
00168 case 4:
00169 _path.push_back(intermediates[6]);
00170 _path.push_back(intermediates[7]);
00171 break;
00172 case 5:
00173 _path.push_back(intermediates[6]);
00174 _path.push_back(intermediates[8]);
00175 break;
00176 }
00177 }
00178 }
00179 else
00180 {
00181 _type = shortestPath::AMBIG;
00182 }
00183 _path.push_back(codon2);
00184 }
00185
00186 std::pair<double,shortestPath::pathType>
00187 shortestPathImpl::process_path(const std::string &intermediate,
00188 const Sequence::GeneticCodes & code)
00189 {
00190 std::string tint = Sequence::Translate(intermediate.begin(),intermediate.end(),
00191 code);
00192 shortestPath::pathType t = shortestPath::AMBIG;
00193 double d = ( distances(t1[0],tint[0]) + distances(tint[0],t2[0]) );
00194 if ( (t1 != tint) && (tint != t2) )
00195 {
00196 t = shortestPath::NN;
00197 }
00198 else if ( ((t1 != tint) && (tint == t2)) ||
00199 ((t1 == tint) && (tint != t2)) )
00200 {
00201 t = shortestPath::SN;
00202 }
00203 else if ( t1==tint && tint==t2 )
00204 {
00205 t = shortestPath::SS;
00206 }
00207 return std::make_pair(d,t);
00208 }
00209
00210 std::pair<double,shortestPath::pathType>
00211 shortestPathImpl::process_path2(const std::string &intermediate1,
00212 const std::string &intermediate2,
00213 const Sequence::GeneticCodes & code)
00214 {
00215 std::string tint1 = Sequence::Translate(intermediate1.begin(),
00216 intermediate1.end(),code);
00217 std::string tint2 = Sequence::Translate(intermediate2.begin(),
00218 intermediate2.end(),code);
00219 shortestPath::pathType t = shortestPath::AMBIG;
00220 double d = (distances(t1[0],tint1[0]) +
00221 distances(tint1[0],tint2[0]) +
00222 distances(tint2[0],t2[0]));
00223
00224 bool a = (t1==tint1),
00225 b = (tint1==tint2),
00226 c = (tint2==t2);
00227
00228 if ( !a && !b && !c )
00229 {
00230 t = shortestPath::NNN;
00231 }
00232 else if ( (a && b && !c) ||
00233 (a && !b && c) ||
00234 (!a && b && c) )
00235 {
00236 t = shortestPath::SSN;
00237 }
00238 else if ( (!a && !b && c) ||
00239 (!a && b && !c) ||
00240 ( a && !b && !c) )
00241 {
00242 t = shortestPath::SNN;
00243 }
00244
00245 else if (a && b && c)
00246 {
00247 t = shortestPath::SSS;
00248 }
00249 return std::make_pair(d,t);
00250 }
00251 #endif
00252
00253 shortestPath::shortestPath(const std::string &codon1,
00254 const std::string &codon2,
00255 const Sequence::GeneticCodes & code)
00256 throw (Sequence::SeqException)
00265 {
00266 try
00267 {
00268 impl = std::auto_ptr<shortestPathImpl>(new shortestPathImpl(codon1,codon2,code));
00269 }
00270 catch(Sequence::SeqException &e)
00271 {
00272 throw;
00273 }
00274 catch(std::exception &e)
00275 {
00276 throw (Sequence::SeqException(e.what()));
00277 }
00278 catch (...)
00279 {
00280 throw (Sequence::SeqException("caught exception of unknown type"));
00281 }
00282 }
00283
00284 shortestPath::~shortestPath()
00285 {
00286 }
00287
00288 shortestPath::pathType shortestPath::type() const
00293 {
00294 return impl->_type;
00295 }
00296
00297 shortestPath::const_iterator shortestPath::begin() const
00303 {
00304 return impl->_path.begin();
00305 }
00306
00307 shortestPath::const_iterator shortestPath::end() const
00313 {
00314 return impl->_path.end();
00315 }
00316
00317 double shortestPath::path_distance() const
00321 {
00322 return impl->_distance;
00323 }
00324
00325 std::pair<unsigned,unsigned> mutsShortestPath(const std::string &codon1,
00326 const std::string &codon2,
00327 const Sequence::GeneticCodes & code)
00328 throw (Sequence::SeqException)
00354 {
00355 try
00356 {
00357 shortestPath sp(codon1,codon2,code);
00358 shortestPath::pathType type = sp.type();
00359 switch (type)
00360 {
00361 case shortestPath::S :
00362 {
00363 return std::make_pair(1,0);
00364 break;
00365 }
00366 case shortestPath::N :
00367 {
00368 return std::make_pair(0,1);
00369 break;
00370 }
00371 case shortestPath::SS :
00372 {
00373 return std::make_pair(2,0);
00374 break;
00375 }
00376 case shortestPath::SN :
00377 {
00378 return std::make_pair(1,1);
00379 break;
00380 }
00381 case shortestPath::NN :
00382 {
00383 return std::make_pair(0,2);
00384 break;
00385 }
00386 case shortestPath::SSS :
00387 {
00388 return std::make_pair(3,0);
00389 break;
00390 }
00391 case shortestPath::SSN :
00392 {
00393 return std::make_pair(2,1);
00394 break;
00395 }
00396 case shortestPath::SNN :
00397 {
00398 return std::make_pair(1,2);
00399 break;
00400 }
00401 case shortestPath::NNN :
00402 {
00403 return std::make_pair(0,3);
00404 break;
00405 }
00406 case shortestPath::NONE :
00407 {
00408 return std::make_pair(0,0);
00409 break;
00410 }
00411 case shortestPath::AMBIG :
00412 {
00413 return std::make_pair(SEQMAXUNSIGNED,SEQMAXUNSIGNED);
00414 break;
00415 }
00416 }
00417 return std::make_pair(SEQMAXUNSIGNED,SEQMAXUNSIGNED);
00418 }
00419 catch (SeqException &e)
00420 {
00421 throw;
00422 }
00423 }
00424
00425 std::pair<unsigned,shortestPath::pathType> diffType(const std::string &codon1,
00426 const std::string &codon2,
00427 const Sequence::GeneticCodes & code)
00428 throw (Sequence::SeqException)
00445 {
00446 try
00447 {
00448 shortestPath sp(codon1,codon2,code);
00449 shortestPath::pathType type = sp.type();
00450
00451 if (type == shortestPath::AMBIG)
00452 {
00453 return std::make_pair(SEQMAXUNSIGNED,type);
00454 }
00455 unsigned site=0,ndiffs=0;
00456 for(unsigned i = 0 ; i < 3 ; ++i)
00457 {
00458 if (codon1[i] != codon2[i])
00459 {
00460 ++ndiffs;
00461 site = i;
00462 }
00463 }
00464 if( ndiffs != 1 )
00465 {
00466 return std::make_pair(SEQMAXUNSIGNED,type);
00467 }
00468 return std::make_pair(site,type);
00469 }
00470 catch (Sequence::SeqException &e)
00471 {
00472 throw;
00473 }
00474 }
00475
00476 boost::tuple<shortestPath::pathType,shortestPath::pathType,shortestPath::pathType>
00477 diffTypeMulti(const std::string &codon1,
00478 const std::string &codon2,
00479 const Sequence::GeneticCodes &code)
00480 throw (Sequence::SeqException)
00493 {
00494 shortestPath::pathType p[3];
00495 std::string t1(codon1),t2(codon2);
00496 try
00497 {
00498 for(unsigned i = 0; i < 3 ; ++i)
00499 {
00500
00501 std::swap(t1[i],t2[i]);
00502 std::pair<unsigned,shortestPath::pathType> dt1 = diffType(t1,codon1,code);
00503 std::pair<unsigned,shortestPath::pathType> dt2 = diffType(t2,codon2,code);
00504 if (dt1 == dt2)
00505 p[i] = dt1.second;
00506 else
00507 p[i] = shortestPath::AMBIG;
00508
00509 std::swap(t1[i],t2[i]);
00510 }
00511 return boost::make_tuple(p[0],p[1],p[2]);
00512 }
00513 catch(Sequence::SeqException &e)
00514 {
00515 throw;
00516 }
00517 }
00518 }