00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <cmath>
00025 #include <cfloat>
00026 #include <cctype>
00027 #include <memory>
00028 #include <cassert>
00029 #include <algorithm>
00030 #ifdef HAVE_IEEEFP_H
00031 #include <ieeefp.h>
00032 #endif
00033 #include <Sequence/Seq.hpp>
00034 #include <Sequence/SeqProperties.hpp>
00035 #include <Sequence/Grantham.hpp>
00036 #include <Sequence/GranthamWeights.hpp>
00037 #include <Sequence/SingleSub.hpp>
00038 #include <Sequence/TwoSubs.hpp>
00039 #include <Sequence/ThreeSubs.hpp>
00040 #include <Sequence/Sites.hpp>
00041 #include <Sequence/Kimura80.hpp>
00042 #include <Sequence/RedundancyCom95.hpp>
00043 #include <Sequence/SeqExceptions.hpp>
00044 #include <Sequence/Comeron95.hpp>
00045
00046 using std::log;
00047 using std::toupper;
00048 using std::isfinite;
00054 namespace Sequence
00055 {
00056 Comeron95::Comeron95 (const Sequence::Seq * seqa,
00057 const Sequence::Seq * seqb,
00058 int max,
00059 const Sequence::RedundancyCom95 * genetic_code_redundancy,
00060 GeneticCodes code,
00061 WeightingScheme2 *_weights2,
00062 WeightingScheme3 *_weights3)
00063 throw (Sequence::SeqException) :
00064 __2wasNULL(false),__3wasNULL(false),__red_was_NULL(false)
00083 {
00084 assert (max >= 1 && max <=3);
00085
00086
00087
00088 if(! (seqa->length() == seqb->length()))
00089 {
00090 throw SeqException("Sequence::Comeron95 -- sequences of unequal lengths");
00091 }
00092
00093 if(!(fabs(double(seqa->length()%3)-0.)<=DBL_EPSILON ) ||
00094 !(fabs(double(seqb->length()%3)-0.)<=DBL_EPSILON ) )
00095 {
00096 throw (SeqException("Sequence::Comeron95 -- sequence lengths are not multiples of 3"));
00097 }
00098
00099
00100
00101 if(_weights2==NULL)
00102 {
00103 __2wasNULL=true;
00104 weights2 = new GranthamWeights2(code);
00105 }
00106
00107 if( _weights3 == NULL)
00108 {
00109 __3wasNULL=true;
00110 weights3 = new GranthamWeights3(code);
00111 }
00112
00113 maxhits = max;
00114 if (genetic_code_redundancy == NULL)
00115 {
00116 sitesObj = new RedundancyCom95 (code);
00117 __red_was_NULL = true;
00118 }
00119 else
00120 sitesObj = genetic_code_redundancy;
00121
00122 sites = new Sites (sitesObj, seqa, seqb, maxhits, code);
00123 q0 = 0.0;
00124 q2S = 0.0;
00125 q2V = 0.0;
00126 q4 = 0.0;
00127 p0 = 0.0;
00128 p2S = 0.0;
00129 p2V = 0.0;
00130 p4 = 0.0;
00131
00132 diverge (seqa, seqb,weights2,weights3);
00133 omega (seqa, seqb);
00134 }
00135
00136 Comeron95::~Comeron95 (void)
00140 {
00141 delete sites;
00142
00143
00144
00145 if(__2wasNULL==true)
00146 delete weights2;
00147 if(__3wasNULL==true)
00148 delete weights3;
00149 if (__red_was_NULL==true)
00150 delete sitesObj;
00151 }
00152
00153 void Comeron95::omega (const Sequence::Seq * seqobj1,
00154 const Sequence::Seq * seqobj2)
00164 {
00165 double log1, log2;
00166
00167 Qs = (q2V + q4) / (sites->L2V() + sites->L4());
00168
00169 if (!isfinite (Qs))
00170 Qs = 0.0;
00171
00172 Bs = (-0.5) * log (1.0 - (2.0 * Qs));
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186 Qa = (q0 + q2S) / (sites->L0() + sites->L2S());
00187
00188 if (!isfinite (Qa))
00189 Qa = 0.0;
00190
00191 Ba = (-0.5) * log (1.0 - (2.0 * Qa));
00192 if (!isfinite (Ba))
00193 {
00194
00195
00196
00197
00198 std::auto_ptr<Kimura80> K80( new Kimura80 (seqobj1, seqobj2));
00199 if (K80->K () < 1.0)
00200 Ba = 0.0;
00201 }
00202
00203
00204 double P2S_site = p2S / sites->L2S();
00205 double P2V_site = p2V / sites->L2V();
00206 double P0_site = p0 / sites->L0();
00207 double Q0_site = q0 / sites->L0();
00208 double P4_site = p4 / sites->L4();
00209 double Q4_site = q4 / sites->L4();
00210
00211 log1 = log (1.0 - (2.0 * P2S_site) - Qa);
00212 log2 = log (1.0 - (2.0 * Qa));
00213
00214 if (!isfinite (log1))
00215 log1 = 0.0;
00216 if (!isfinite (log2))
00217 log2 = 0.0;
00218
00219 A2S = (-0.5) * log1 + (0.25) * log2;
00220
00221 log1 = log (1.0 - (2.0 * P4_site) - Q4_site);
00222 log2 = log (1.0 - (2.0 * Q4_site));
00223
00224 if (!isfinite (log1))
00225 log1 = 0.0;
00226 if (!isfinite (log2))
00227 log2 = 0.0;
00228
00229 A4 = (-0.5) * log1 + (0.25) * log2;
00230
00231 As = (sites->L2S() * A2S + sites->L4() * A4) / (sites->L2S() +
00232 sites->L4());
00233
00234 log1 = log (1.0 - (2.0 * P2V_site) - Qs);
00235 log2 = log (1.0 - (2.0 * Qs));
00236
00237 if (!isfinite (log1))
00238 log1 = 0.0;
00239 if (!isfinite (log2))
00240 log2 = 0.0;
00241
00242 A2V = (-0.5) * log1 + (0.25) * log2;
00243
00244 log1 = log (1.0 - (2.0 * P0_site) - Q0_site);
00245 log2 = log (1.0 - (2.0 * Q0_site));
00246 if (!isfinite (log1))
00247 log1 = 0.;
00248 if (!isfinite (log2))
00249 log2 = 0.0;
00250
00251 A0 = (-0.5) * log1 + (0.25) * log2;
00252
00253 Aa = (sites->L2V() * A2V + sites->L0() * A0) / (sites->L2V() +
00254 sites->L0());
00255
00256 if (As <= 0.0)
00257 As = 0.0;
00258 if (Bs <= 0.0)
00259 Bs = 0.0;
00260 if (Aa <= 0.0)
00261 Aa = 0.0;
00262 if (Ba <= 0.0)
00263 Ba = 0.0;
00264
00265 Ks = As + Bs;
00266 Ka = Aa + Ba;
00267
00268 if (!isfinite (Ks))
00269 Ks = 999;
00270 if (!isfinite (Ka))
00271 Ka = 999;
00272 }
00273
00274 void Comeron95::diverge (const Sequence::Seq * seq1,
00275 const Sequence::Seq * seq2,
00276 WeightingScheme2 *weights2,
00277 WeightingScheme3 *weights3)
00283 {
00284 size_t i, j, ndiff;
00285 size_t length = seq1->length ();
00286
00287
00288 std::string codon1, codon2;
00289 codon1.resize (3);
00290 codon2.resize (3);
00291
00292 for (i = 0; i <= (length - 3); i += 3)
00293 {
00294 for (j = 0; j <= 2; ++j)
00295 {
00296
00297 codon1[j] = char(std::toupper((*seq1)[i + j]));
00298 codon2[j] = char(std::toupper((*seq2)[i + j]));
00299 }
00300 if ( std::find_if(codon1.begin(),codon1.end(),ambiguousNucleotide()) == codon1.end() &&
00301 std::find_if(codon2.begin(),codon2.end(),ambiguousNucleotide()) == codon2.end() )
00302 {
00303
00304 if (Different (codon1, codon2))
00305 {
00306
00307 ndiff = NumDiffs (codon1, codon2);
00308 if (ndiff == 1)
00309
00310 {
00311 SingleSub Single;
00312 Single(sitesObj, codon1, codon2);
00313 p0 += Single.P0 ();
00314 p2S += Single.P2S ();
00315 p2V += Single.P2V ();
00316 p4 += Single.P4 ();
00317 q0 += Single.Q0 ();
00318 q2S += Single.Q2S ();
00319 q2V += Single.Q2V ();
00320 q4 += Single.Q4 ();
00321 }
00322
00323 if (maxhits > 1)
00324 {
00325
00326
00327 if (Different
00328 (codon1, codon2))
00329 {
00330 ndiff = NumDiffs
00331 (codon1,
00332 codon2);
00333
00334 if (ndiff == 2
00335 && maxhits >= 2)
00336 {
00337
00338 TwoSubs Double;
00339 Double(sitesObj, codon1, codon2,weights2);
00340 p0 += Double. P0();
00341 p2S += Double.P2S ();
00342 p2V += Double.P2V ();
00343 p4 += Double.P4();
00344 q0 += Double.Q0 ();
00345 q2S += Double.Q2S ();
00346 q2V += Double.Q2V ();
00347 q4 += Double.Q4 ();
00348 }
00349 else if (ndiff == 3 && maxhits > 2)
00350 {
00351 ThreeSubs Triple;
00352 Triple(sitesObj,codon1, codon2,weights3);
00353 p0 += Triple. P0 ();
00354 p2S += Triple.P2S ();
00355 p2V += Triple.P2V ();
00356 p4 += Triple. P4 ();
00357 q0 += Triple.Q0 ();
00358 q2S += Triple.Q2S ();
00359 q2V += Triple.Q2V ();
00360 q4 += Triple.Q4 ();
00361 }
00362 }
00363 }
00364 }
00365 }
00366 }
00367
00368 if (!isfinite (p0))
00369 p0 = 0.0;
00370 if (!isfinite (p2S) )
00371 p2S = 0.0;
00372 if (!isfinite (p2V) )
00373 p2V = 0.0;
00374 if (!isfinite (p4) )
00375 p4 = 0.0;
00376 if (!isfinite (q0) )
00377 q0 = 0.0;
00378 if (!isfinite (q2S) )
00379 q2S = 0.0;
00380 if (!isfinite (q2V) )
00381 q2V = 0.0;
00382 if (!isfinite (q4) )
00383 q4 = 0.0;
00384 }
00385
00386 double Comeron95::L0 (void) const
00390 {
00391 return sites->L0();
00392 }
00393 double Comeron95::L2S (void) const
00397 {
00398 return sites->L2S();
00399 }
00400 double Comeron95::L2V (void) const
00404 {
00405 return sites->L2V();
00406 }
00407 double Comeron95::L4 (void) const
00411 {
00412 return sites->L4();
00413 }
00414
00415 double Comeron95::as (void) const
00419 {
00420 if (!isfinite (As))
00421 return 999.;
00422 return As;
00423 }
00424 double Comeron95::aa (void) const
00428 {
00429 if (!isfinite (Aa))
00430 return 999.;
00431 return Aa;
00432 }
00433 double Comeron95::bs (void) const
00437 {
00438 if (!isfinite (Bs))
00439 return 999.;
00440 return Bs;
00441 }
00442 double Comeron95::ba (void) const
00446 {
00447 if (!isfinite (Ba))
00448 return 999.;
00449 return Ba;
00450 }
00451
00452 double Comeron95::ratio(void) const
00457 {
00458
00459 if (Ka == 999. || Ks == 999. || fabs(Ks-0.) <= DBL_EPSILON)
00460 return 999.;
00461 return Ka / Ks;
00462 }
00463 double Comeron95::ka (void) const
00468 {
00469 return Ka;
00470 }
00471 double Comeron95::ks (void) const
00476 {
00477 return Ks;
00478 }
00479
00480 double Comeron95::P0 (void) const
00484 {
00485 return p0;
00486 }
00487 double Comeron95::P2S (void) const
00491 {
00492 return p2S;
00493 }
00494 double Comeron95::P2V (void) const
00498 {
00499 return p2V;
00500 }
00501 double Comeron95::P4 (void) const
00505 {
00506 return p4;
00507 }
00508 double Comeron95::Q0 (void) const
00512 {
00513 return q0;
00514 }
00515 double Comeron95::Q2S (void) const
00519 {
00520 return q2S;
00521 }
00522 double Comeron95::Q2V (void) const
00526 {
00527 return q2V;
00528 }
00529 double Comeron95::Q4 (void) const
00533 {
00534 return q4;
00535 }
00536 }