00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <cmath>
00025 #include <cfloat>
00026 #include <cassert>
00027 #include <utility>
00028 #include <cstdlib>
00029 #include <cctype>
00030 #include <set>
00031 #include <algorithm>
00032 #include <boost/bind.hpp>
00033 #include <Sequence/PolyTable.hpp>
00034 #include <Sequence/Comparisons.hpp>
00035 #include <Sequence/Recombination.hpp>
00036 #include <Sequence/PolySNP.hpp>
00037 #include <Sequence/stateCounter.hpp>
00038 #include <Sequence/SeqConstants.hpp>
00039 #include <Sequence/PolySNPimpl.hpp>
00040 #include <Sequence/StringComp.hpp>
00041
00047 using std::string;
00048 using namespace Sequence::Recombination;
00049
00050 namespace Sequence
00051 {
00052 struct uniqueSeq : public std::binary_function<std::string,std::string,bool>
00053 {
00054 inline bool operator()(const std::string & l, const std::string & r) const
00055 {
00056
00057
00058 return ( Different(l,r) && std::lexicographical_compare(l.begin(),l.end(),r.begin(),r.end(),lt_nocase()) );
00059 }
00060 };
00061 _PolySNPImpl::_PolySNPImpl (const Sequence::PolyTable * data, bool haveOutgroup ,
00062 unsigned outgroup, bool totMuts):
00063 _data(data),
00064 _nsites(data->numsites()),
00065 _nsam(unsigned(data->size())),
00066 _outgroup(outgroup),
00067 _haveOutgroup(haveOutgroup),
00068 _totMuts(totMuts),
00069 _totsam (unsigned(data->size())),
00070 _DVK(0),
00071 _DVH(1.0),
00072 _counted_singletons(false),
00073 _know_pi(false),
00074 _CalculatedDandV(false),
00075 _pi(0.),
00076 _singletons(0),
00077 _walls_Bprime(0),
00078 _NumPoly(0),
00079 _walls_B(0.),_walls_Q(0.),
00080 _calculated_wall_stats(false),
00081 _counts(std::vector< Sequence::stateCounter >(_nsites,Sequence::stateCounter('-'))),
00082 _derivedCounts(std::vector< std::pair< bool, stateCounter > >
00083 (_nsites,
00084 std::make_pair<bool,stateCounter>(true,stateCounter('-')))),
00085 _preprocessed(false)
00086 {
00087 if (haveOutgroup)
00088 --_totsam;
00089 preprocess();
00090 }
00091
00092 void _PolySNPImpl::preprocess()
00108 {
00109 if (!_preprocessed)
00110 {
00111 for(unsigned site = 0 ; site < _nsites ; ++site)
00112 {
00113 for (unsigned seq = 0 ; seq < _nsam ; ++seq)
00114 {
00115
00116
00117 if (!_haveOutgroup
00118 || (_haveOutgroup && seq != _outgroup))
00119 {
00120 _counts[site]( (*_data)[seq][site] );
00121 }
00122
00123
00124 if (_haveOutgroup == true)
00125 {
00126
00127
00128
00129 if ( std::toupper((*_data)[_outgroup][site]) == 'N' ||
00130 (*_data)[_outgroup][site] == '-')
00131 {
00132 _derivedCounts[site].first = false;
00133 }
00134 else
00135 {
00136
00137 _derivedCounts[site].first = true;
00138 if ( seq != _outgroup && (*_data)[seq][site] != (*_data)[_outgroup][site] )
00139 {
00140 _derivedCounts[site].second((*_data)[seq][site]);
00141 }
00142 }
00143 }
00144 else
00145 {
00146
00147
00148 _derivedCounts[site].first = false;
00149 }
00150 }
00151 if(_counts[site].nStates() > 1 && _counts[site].gap==0) ++_NumPoly;
00152 }
00153 _preprocessed = true;
00154 }
00155 }
00156
00157 PolySNP::PolySNP (const Sequence::PolyTable * data, bool haveOutgroup ,
00158 unsigned outgroup, bool totMuts):
00159 rep(std::auto_ptr<_PolySNPImpl>(new _PolySNPImpl(data,haveOutgroup,outgroup,totMuts)))
00169 {}
00170
00171 PolySNP::~PolySNP (void)
00172 {
00173 }
00174
00175 double
00176 PolySNP::ThetaPi (void)
00203 {
00204 assert ( rep->_preprocessed );
00205 if (rep->_know_pi == false)
00206 {
00207 double Pi = 0.0;
00208 for (unsigned i = 0; i < rep->_nsites; ++i)
00209 {
00210 if ( rep->_counts[i].gap == 0 &&
00211 rep->_counts[i].nStates() > 1 )
00212 {
00213 unsigned samplesize = rep->_totsam;
00214 double SSH = 0.0;
00215 samplesize -= rep->_counts[i].n;
00216 if (samplesize > 1)
00217 {
00218 double denom = (double(samplesize)* (double(samplesize) - 1.0));
00219 SSH += (rep->_counts[i].a > 0) ? double(rep->_counts[i].a) *
00220 double (rep->_counts[i].a-1) /denom : 0. ;
00221 SSH += (rep->_counts[i].g > 0) ? double(rep->_counts[i].g) *
00222 double (rep->_counts[i].g-1) /denom : 0. ;
00223 SSH += (rep->_counts[i].c > 0) ? double(rep->_counts[i].c) *
00224 double (rep->_counts[i].c-1) /denom : 0. ;
00225 SSH += (rep->_counts[i].t > 0) ? double(rep->_counts[i].t) *
00226 double (rep->_counts[i].t-1) /denom : 0. ;
00227 SSH += (rep->_counts[i].zero > 0) ? double(rep->_counts[i].zero) *
00228 double (rep->_counts[i].zero-1) /denom : 0. ;
00229 SSH += (rep->_counts[i].one > 0) ? double(rep->_counts[i].one) *
00230 double (rep->_counts[i].one-1) /denom : 0. ;
00231 Pi += (1.0 - SSH);
00232 }
00233 }
00234 }
00235 rep->_pi = Pi;
00236 rep->_know_pi = true;
00237 return rep->_pi;
00238 }
00239 else
00240 return rep->_pi;
00241 return rep->_pi;
00242 }
00243
00244
00245 double
00246 PolySNP::ThetaW (void)
00264 {
00265 assert ( rep->_preprocessed );
00266 double W = 0.0;
00267 for (unsigned i = 0; i < rep->_nsites; ++i)
00268 {
00269 if ( rep->_counts[i].gap == 0)
00270 {
00271 int nstates = rep->_counts[i].nStates();
00272 unsigned nsam_site = rep->_totsam - rep->_counts[i].n;
00273 double denom=0.0;
00274 if(rep->_totMuts==true&&nstates>=2)
00275 {
00276 for(unsigned i = 1 ; i < nsam_site ; ++i)
00277 denom += 1.0/double(i);
00278 W += double(nstates-1)/denom;
00279 }
00280 else if (rep->_totMuts==false &&nstates>=2)
00281 {
00282 for(unsigned i = 1 ; i < nsam_site ; ++i)
00283 denom += 1.0/double(i);
00284 W += 1.0/denom;
00285 }
00286 }
00287 }
00288 return (W);
00289 }
00290
00291 double
00292 PolySNP::ThetaH (void)
00324 {
00325 assert ( rep->_preprocessed );
00326 if (rep->_NumPoly==0)
00327 return 0.;
00328 if( !rep->_haveOutgroup)
00329 return strtod("NAN",NULL);
00330 double H = 0.0;
00331 bool anc_is_present = 0;
00332
00333 for (unsigned i = 0; i < rep->_nsites; ++i)
00334 {
00335 if ( rep->_derivedCounts[i].second.gap == 0)
00336 {
00337 unsigned samplesize = rep->_totsam;
00338 unsigned sumDerCounts = 0;
00339 sumDerCounts += rep->_derivedCounts[i].second.a;
00340 sumDerCounts += rep->_derivedCounts[i].second.g;
00341 sumDerCounts += rep->_derivedCounts[i].second.c;
00342 sumDerCounts += rep->_derivedCounts[i].second.t;
00343 sumDerCounts += rep->_derivedCounts[i].second.zero;
00344 sumDerCounts += rep->_derivedCounts[i].second.one;
00345 unsigned ancestralCounts = samplesize-sumDerCounts-rep->_derivedCounts[i].second.n;
00346
00347
00348
00349
00350
00351 anc_is_present = (rep->_derivedCounts[i].first == true &&
00352 ancestralCounts > 0 ) ? true : false;
00353 if (anc_is_present)
00354 {
00355
00356 int numDer = rep->_derivedCounts[i].second.nStates();
00357 if (numDer == 1)
00358 {
00359 samplesize -= rep->_derivedCounts[i].second.n;
00360 double denom = (double(samplesize) *
00361 (double(samplesize) - 1.0));
00362 H += (rep->_derivedCounts[i].second.a > 0 ) ?
00363 (2.0 * pow (double (rep->_derivedCounts[i].second.a), 2.0)) / denom : 0. ;
00364 H += (rep->_derivedCounts[i].second.g > 0 ) ?
00365 (2.0 * pow (double (rep->_derivedCounts[i].second.g), 2.0)) / denom : 0. ;
00366 H += (rep->_derivedCounts[i].second.c > 0 ) ?
00367 (2.0 * pow (double (rep->_derivedCounts[i].second.c), 2.0)) / denom : 0. ;
00368 H += (rep->_derivedCounts[i].second.t > 0 ) ?
00369 (2.0 * pow (double (rep->_derivedCounts[i].second.t), 2.0)) / denom : 0. ;
00370 H += (rep->_derivedCounts[i].second.zero > 0) ?
00371 (2.0 * pow (double (rep->_derivedCounts[i].second.zero), 2.0)) / denom : 0. ;
00372 H += (rep->_derivedCounts[i].second.one > 0 ) ?
00373 (2.0 * pow (double (rep->_derivedCounts[i].second.one), 2.0)) / denom : 0. ;
00374 }
00375 else if (numDer == 2 && rep->_haveOutgroup)
00376 {
00377
00378 int config[2];
00379 unsigned k = 0;
00380 if(rep->_derivedCounts[i].second.a > 0)
00381 {
00382 config[k++] = rep->_derivedCounts[i].second.a;
00383 }
00384 if(rep->_derivedCounts[i].second.g > 0)
00385 {
00386 config[k++] = rep->_derivedCounts[i].second.g;
00387 }
00388 if(rep->_derivedCounts[i].second.c > 0)
00389 {
00390 config[k++] = rep->_derivedCounts[i].second.c;
00391 }
00392 if(rep->_derivedCounts[i].second.t > 0)
00393 {
00394 config[k++] = rep->_derivedCounts[i].second.t;
00395 }
00396 if(rep->_derivedCounts[i].second.zero > 0)
00397 {
00398 config[k++] = rep->_derivedCounts[i].second.zero;
00399 }
00400 if(rep->_derivedCounts[i].second.one > 0)
00401 {
00402 config[k++] = rep->_derivedCounts[i].second.one;
00403 }
00404 for (int i = 0; i < 2; ++i)
00405 {
00406 double sample_size_adjust = (i==0) ? double(config[1]) : double(config[0]);
00407 H += (2.0 *
00408 pow (double (config[i]),
00409 2.0)) / ((double(samplesize) -
00410 sample_size_adjust)
00411 * (double(samplesize) -
00412 sample_size_adjust
00413 - 1.0));
00414 }
00415 }
00416 }
00417 }
00418 }
00419 return H;
00420 }
00421
00422 double
00423 PolySNP::ThetaL (void)
00456 {
00457 assert(rep->_haveOutgroup==true);
00458 assert ( rep->_preprocessed );
00459 double thetal = 0.0;
00460 if(rep->_NumPoly==0) return thetal;
00461 bool anc_is_present = 0;
00462
00463 for (unsigned i = 0; i < rep->_nsites; ++i)
00464 {
00465 if (rep->_derivedCounts[i].first == true && rep->_derivedCounts[i].second.gap == 0)
00466 {
00467 unsigned samplesize = rep->_totsam;
00468 unsigned sumDerCounts = 0;
00469 sumDerCounts += rep->_derivedCounts[i].second.a;
00470 sumDerCounts += rep->_derivedCounts[i].second.g;
00471 sumDerCounts += rep->_derivedCounts[i].second.c;
00472 sumDerCounts += rep->_derivedCounts[i].second.t;
00473 sumDerCounts += rep->_derivedCounts[i].second.zero;
00474 sumDerCounts += rep->_derivedCounts[i].second.one;
00475 unsigned ancestralCounts = samplesize-sumDerCounts-rep->_derivedCounts[i].second.n;
00476
00477
00478
00479
00480
00481 anc_is_present = (rep->_derivedCounts[i].first == true &&
00482 ancestralCounts > 0 ) ? true : false;
00483 if (anc_is_present)
00484 {
00485
00486 int numDer = rep->_derivedCounts[i].second.nStates();
00487 if (numDer == 1)
00488 {
00489 samplesize -= rep->_derivedCounts[i].second.n;
00490 double denom = (double(samplesize) - 1.0);
00491 thetal += (rep->_derivedCounts[i].second.a > 0 ) ?
00492 double (rep->_derivedCounts[i].second.a) / denom : 0. ;
00493 thetal += (rep->_derivedCounts[i].second.g > 0 ) ?
00494 double (rep->_derivedCounts[i].second.g) / denom : 0. ;
00495 thetal += (rep->_derivedCounts[i].second.c > 0 ) ?
00496 double (rep->_derivedCounts[i].second.c) / denom : 0. ;
00497 thetal += (rep->_derivedCounts[i].second.t > 0 ) ?
00498 double (rep->_derivedCounts[i].second.t) / denom : 0. ;
00499 thetal += (rep->_derivedCounts[i].second.zero > 0) ?
00500 double (rep->_derivedCounts[i].second.zero) / denom : 0. ;
00501 thetal += (rep->_derivedCounts[i].second.one > 0 ) ?
00502 double (rep->_derivedCounts[i].second.one) / denom : 0. ;
00503 }
00504 else if (numDer == 2 && rep->_haveOutgroup)
00505 {
00506
00507 int config[2];
00508 unsigned k = 0;
00509 if(rep->_derivedCounts[i].second.a > 0)
00510 {
00511 config[k++] = rep->_derivedCounts[i].second.a;
00512 }
00513 if(rep->_derivedCounts[i].second.g > 0)
00514 {
00515 config[k++] = rep->_derivedCounts[i].second.g;
00516 }
00517 if(rep->_derivedCounts[i].second.c > 0)
00518 {
00519 config[k++] = rep->_derivedCounts[i].second.c;
00520 }
00521 if(rep->_derivedCounts[i].second.t > 0)
00522 {
00523 config[k++] = rep->_derivedCounts[i].second.t;
00524 }
00525 if(rep->_derivedCounts[i].second.zero > 0)
00526 {
00527 config[k++] = rep->_derivedCounts[i].second.zero;
00528 }
00529 if(rep->_derivedCounts[i].second.one > 0)
00530 {
00531 config[k++] = rep->_derivedCounts[i].second.one;
00532 }
00533 for (int i = 0; i < 2; ++i)
00534 {
00535 double sample_size_adjust = (i==0) ? double(config[1]) : double(config[0]);
00536 thetal += (double (config[i])) / (double(samplesize) -
00537 sample_size_adjust- 1.0);
00538 }
00539 }
00540 }
00541 }
00542 }
00543 return thetal;
00544 }
00545
00546 unsigned
00547 PolySNP::NumPoly (void)
00551 {
00552 assert ( rep->_preprocessed );
00553 unsigned npoly = 0;
00554 for (unsigned i = 0; i < rep->_nsites; ++i)
00555 {
00556 if (rep->_counts[i].nStates() > 1 &&
00557 rep->_counts[i].gap == 0)
00558 ++npoly;
00559 }
00560 return npoly;
00561 }
00562
00563 unsigned
00564 PolySNP::NumMutations (void)
00569 {
00570 assert( rep->_preprocessed );
00571 unsigned nmut = 0;
00572
00573 for (unsigned i = 0; i < rep->_nsites; ++i)
00574 {
00575 int nstates = (rep->_counts[i].gap==0) ? rep->_counts[i].nStates() : 0;
00576 if (nstates > 1)
00577 nmut += nstates - 1;
00578 }
00579 return nmut;
00580 }
00581
00582 unsigned
00583 PolySNP::NumSingletons (void)
00587 {
00588 assert ( rep->_preprocessed );
00589 unsigned nsing = 0,nstates;
00590 for (unsigned i = 0; i < rep->_nsites; ++i)
00591 {
00592 unsigned curr_nsing=0,nsam=0;
00593 nstates = rep->_counts[i].nStates();
00594 if (rep->_counts[i].gap == 0 && nstates>1)
00595 {
00596 nsam = rep->_totsam - rep->_counts[i].n;
00597 if(nsam==2 && nstates ==2)
00598 curr_nsing=1;
00599 else
00600 {
00601 curr_nsing += (rep->_counts[i].a == 1) ? 1 : 0;
00602 curr_nsing += (rep->_counts[i].g == 1) ? 1 : 0;
00603 curr_nsing += (rep->_counts[i].c == 1) ? 1 : 0;
00604 curr_nsing += (rep->_counts[i].t == 1) ? 1 : 0;
00605 curr_nsing += (rep->_counts[i].zero == 1) ? 1 : 0;
00606 curr_nsing += (rep->_counts[i].one == 1) ? 1 : 0;
00607 }
00608 }
00609 nsing += curr_nsing;
00610 }
00611 return nsing;
00612 }
00613
00614
00615 unsigned
00616 PolySNP::NumExternalMutations (void)
00621 {
00622 if(!rep->_haveOutgroup) return SEQMAXUNSIGNED;
00623 assert ( rep->_preprocessed );
00624 int next = 0;
00625 for (unsigned i = 0; i < rep->_nsites ; ++i)
00626 {
00627 unsigned nsam=rep->_totsam;
00628 unsigned curr_next=0;
00629 if(rep->_derivedCounts[i].first == true &&
00630 rep->_derivedCounts[i].second.gap == 0)
00631 {
00632 nsam -= rep->_derivedCounts[i].second.n;
00633 curr_next += (rep->_derivedCounts[i].second.a == 1) ? 1 : 0;
00634 curr_next += (rep->_derivedCounts[i].second.g == 1) ? 1 : 0;
00635 curr_next += (rep->_derivedCounts[i].second.c == 1) ? 1 : 0;
00636 curr_next += (rep->_derivedCounts[i].second.t == 1) ? 1 : 0;
00637 curr_next += (rep->_derivedCounts[i].second.zero == 1) ? 1 : 0;
00638 curr_next += (rep->_derivedCounts[i].second.one == 1) ? 1 : 0;
00639 }
00640 next += (nsam>1) ? curr_next : 0;
00641 }
00642 return next;
00643 }
00644
00645
00646 double
00647 PolySNP::TajimasD (void)
00655 {
00656 assert ( rep->_preprocessed );
00657 if(rep->_NumPoly==0) return strtod("NAN",NULL);
00658 double D = 0.0;
00659 double Pi = ThetaPi ();
00660 double W = ThetaW ();
00661 if (fabs(Pi-0.) <= DBL_EPSILON && fabs(W-0.) <= DBL_EPSILON)
00662 D = 0.0;
00663 else
00664 D = (Pi - W) / Dnominator ();
00665 return D;
00666 }
00667
00668 double PolySNP::Hprime (bool likeThorntonAndolfatto)
00681 {
00682 assert ( rep->_preprocessed );
00683 if(rep->_NumPoly==0) return strtod("NAN",NULL);
00684 assert(rep->_haveOutgroup==true);
00685 double Hpr = 0.0;
00686 double a = a_sub_n ();
00687 double b = b_sub_n ();
00688 double pi = ThetaPi ();
00689 double theta = ThetaW();
00690
00691 double thetal = ThetaL();
00692 double b_n_plus1 = b_sub_n_plus1();
00693 double S = (rep->_totMuts) ? NumMutations() : NumPoly();
00694 double thetasq = (likeThorntonAndolfatto == false ) ? S * (S-1)/(a*a + b) : theta*theta;
00695
00696 double vThetal =
00697 (rep->_totsam * theta)/(2.0 * (rep->_totsam - 1.0))
00698 + (2.0 * pow(rep->_totsam/(rep->_totsam - 1.0), 2.0) * (b_n_plus1 - 1.0) - 1.0) * thetasq;
00699
00700 double vPi =
00701 (3.0 * rep->_totsam *(rep->_totsam + 1.0) * theta
00702 + 2.0 * ( rep->_totsam * rep->_totsam + rep->_totsam + 3.0) * thetasq )
00703 / (9 * rep->_totsam * (rep->_totsam -1.0));
00704
00705 double cov =
00706 ((rep->_totsam + 1.0) / (3.0 * (rep->_totsam - 1.0))) * theta
00707 +(( 7.0 * rep->_totsam * rep->_totsam + 3.0 * rep->_totsam - 2.0 - 4.0 * rep->_totsam *( rep->_totsam + 1.0) * b_n_plus1)
00708 / (2.0 * pow ((rep->_totsam - 1.0), 2.0)))
00709 * thetasq ;
00710
00711
00712 Hpr = pi - thetal;
00713 Hpr /= pow ( (vThetal + vPi - 2.0 * cov), 0.5);
00714 return (Hpr);
00715 }
00716
00717
00718 double
00719 PolySNP::Dnominator (void)
00724 {
00725 assert ( rep->_preprocessed );
00726 if(rep->_NumPoly==0) return strtod("NAN",NULL);
00727 double S = 0.0;
00728 if (rep->_totMuts)
00729 {
00730 S = double (NumMutations ());
00731 }
00732 else if (!(rep->_totMuts))
00733 {
00734 S = double (NumPoly ());
00735 }
00736 double a1, a2, b1, b2, c1, c2, e1, e2;
00737
00738 a1 = a_sub_n ();
00739 a2 = b_sub_n ();
00740 b1 = (rep->_totsam + 1.0) / (3.0 * (rep->_totsam - 1.0));
00741 b2 = (2.0 * (pow (rep->_totsam, 2.0)
00742 + rep->_totsam + 3.0)) / (9.0 * rep->_totsam *
00743 (rep->_totsam - 1.0));
00744 c1 = b1 - 1.0 / a1;
00745 c2 = b2 - (rep->_totsam + 2.0) / (a1 * rep->_totsam) + a2 / pow (a1, 2.0);
00746 e1 = c1 / a1;
00747 e2 = c2 / (pow (a1, 2.0) + a2);
00748 double denominator = pow ((e1 * S + e2 * S * (S - 1.0)), 0.5);
00749 return (denominator);
00750 }
00751
00752 void
00753 PolySNP::DepaulisVeuilleStatistics (void)
00766 {
00767 assert ( rep->_preprocessed );
00768 if (!(rep->_CalculatedDandV))
00769 {
00770 if(rep->_NumPoly == 0)
00771 {
00772 rep->_DVK = 1;
00773 rep->_DVH = 0.;
00774 return;
00775 }
00776 if (rep->_data->size() > 0)
00777 {
00778
00779
00780 std::set<string,uniqueSeq> unique_haplotypes;
00781 if (rep->_haveOutgroup)
00782 {
00783 unique_haplotypes.insert(rep->_data->begin(),
00784 rep->_data->begin()+rep->_outgroup);
00785 unique_haplotypes.insert(rep->_data->begin()+rep->_outgroup+1,
00786 rep->_data->end());
00787 }
00788 else
00789 {
00790 unique_haplotypes.insert(rep->_data->begin(),
00791 rep->_data->end());
00792 }
00793
00794 std::set<string,uniqueSeq>::const_iterator beg = unique_haplotypes.begin(),
00795 end = unique_haplotypes.end();
00796 rep->_DVK = unsigned(unique_haplotypes.size());
00797 while(beg != end)
00798 {
00799 ptrdiff_t _count = 0;
00800 if (rep->_haveOutgroup)
00801 {
00802 _count += std::count_if(rep->_data->begin(),
00803 rep->_data->begin()+rep->_outgroup,
00804 boost::bind(notDifferent<string>,
00805 _1,*beg));
00806 _count += std::count_if(rep->_data->begin()+rep->_outgroup+1,
00807 rep->_data->end(),
00808 boost::bind(notDifferent<string>,
00809 _1,*beg));
00810 }
00811 else
00812 {
00813 _count += std::count_if(rep->_data->begin(),
00814 rep->_data->end(),
00815 boost::bind(notDifferent<string>,
00816 _1,*beg));
00817 }
00818 rep->_DVH -= pow (double (_count) / rep->_totsam, 2.0);
00819 ++beg;
00820 }
00821 rep->_DVH *= rep->_totsam / (rep->_totsam - 1.0);
00822 rep->_CalculatedDandV = 1;
00823 }
00824 }
00825 }
00826
00827 double PolySNP::WallsB(void)
00833 {
00834 assert ( rep->_preprocessed );
00835 if (rep->_calculated_wall_stats == false)
00836 {
00837 WallStats();
00838 }
00839 return rep->_walls_B;
00840 }
00841
00842 void PolySNP::WallStats(void)
00843 {
00844 assert ( rep->_preprocessed );
00845 unsigned S = 0;
00846
00847
00848 for( std::vector<stateCounter>::const_iterator itr = rep->_counts.begin() ;
00849 itr < rep->_counts.end();
00850 ++itr)
00851 {
00852 if ( itr->nStates() == 2 && itr->gap == 0)
00853 ++S;
00854 }
00855 if (S > 1)
00856 {
00857 ptrdiff_t nhap_curr, nhap_left;
00858
00859 nhap_left = ptrdiff_t(SEQMAXUNSIGNED);
00860
00861 unsigned A = 0;
00862
00863 for (unsigned site1 = 0 ; site1 < rep->_nsites-1 ; ++site1)
00864 {
00865 for(unsigned site2=site1+1 ; site2 < rep->_nsites ; ++site2)
00866 {
00867 if ( rep->_counts[site1].nStates() == 2
00868 && rep->_counts[site2].nStates() == 2 )
00869 {
00870 std::string config;
00871 config.resize(2);
00872 std::set<string,uniqueSeq> unique_haplotypes;
00873 nhap_curr = 0;
00874 for (unsigned i = 0 ; i < rep->_nsam ; ++i)
00875 {
00876 if ( (!rep->_haveOutgroup) || (rep->_haveOutgroup && i != rep->_outgroup) )
00877 {
00878 config[0] = (*rep->_data)[i][site1];
00879 config[1] = (*rep->_data)[i][site2];
00880 unique_haplotypes.insert(config);
00881 }
00882 }
00883 nhap_curr = unique_haplotypes.size();
00884 if(site1==0)
00885 {
00886 if (nhap_curr == 2)
00887 {
00888 ++rep->_walls_Bprime;
00889 ++A;
00890 }
00891 }
00892 else
00893 {
00894 if (nhap_curr == 2)
00895 ++rep->_walls_Bprime;
00896 if (nhap_curr == 2 && nhap_left != 2)
00897 ++A;
00898 }
00899 nhap_left = nhap_curr;
00900 site1=site2;
00901 }
00902 }
00903 }
00904 rep->_walls_B = double(rep->_walls_Bprime)/(double(S-1));
00905 rep->_walls_Q = (double(rep->_walls_Bprime) + double(A))/(double(S));
00906 }
00907 else
00908 {
00909 rep->_walls_B = strtod("NAN",NULL);
00910 rep->_walls_Bprime = 0;
00911 rep->_walls_Q = strtod("NAN",NULL);
00912 }
00913 rep->_calculated_wall_stats=true;
00914 }
00915
00916
00917 unsigned PolySNP::WallsBprime(void)
00923 {
00924 assert ( rep->_preprocessed );
00925 if (rep->_calculated_wall_stats == false)
00926 {
00927 WallStats();
00928 }
00929 return rep->_walls_Bprime;
00930 }
00931
00932 double PolySNP::WallsQ(void)
00938 {
00939 assert ( rep->_preprocessed );
00940 if (rep->_calculated_wall_stats == false)
00941 {
00942 WallStats();
00943 }
00944 return rep->_walls_Q;
00945 }
00946
00947 double
00948 PolySNP::VarPi (void)
00953 {
00954 double Pi = ThetaPi ();
00955 double variance = 3.0 * rep->_totsam * (rep->_totsam + 1.0) * Pi +
00956 2.0 * (pow (rep->_totsam, 2.0) + rep->_totsam + 3.0) * pow (Pi, 2.0);
00957 variance /= (11.0 * pow (rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
00958 return (variance);
00959 }
00960
00961 double
00962 PolySNP::StochasticVarPi (void)
00967 {
00968 double Pi = ThetaPi ();
00969 double variance = (3.0 * pow (rep->_totsam, 2.0) - 3.0 * rep->_totsam + 2.0) * Pi +
00970 2.0 * rep->_totsam * (rep->_totsam - 1.0) * pow (Pi, 2.0);
00971 variance /= (11.0 * pow (rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
00972 return (variance);
00973 }
00974
00975 double
00976 PolySNP::SamplingVarPi (void)
00982 {
00983 double Pi = ThetaPi ();
00984 double variance =
00985 2.0 * (3.0 * rep->_totsam - 1.0) * Pi + 2.0 * (2.0 * rep->_totsam +
00986 3.0) * pow (Pi, 2.0);
00987 variance /= (11.0 * pow (rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
00988 return (variance);
00989 }
00990
00991
00992 double
00993 PolySNP::VarThetaW (void)
00998 {
00999 double a1 = a_sub_n ();
01000 double a2 = b_sub_n ();
01001 double S = (rep->_totMuts) ? NumMutations() : NumPoly();
01002 double variance = pow (a1, 2.0) * S + a2 * pow (S, 2.0);
01003 variance /= pow (a1, 2.0) * (pow (a1, 2.0) + a2);
01004 return (variance);
01005 }
01006
01007
01008 double
01009 PolySNP::FuLiD (void)
01015 {
01016 assert ( rep->_preprocessed );
01017
01018 if(rep->_NumPoly==0 || !rep->_haveOutgroup) return strtod("NAN",NULL);
01019 double D = 0.0;
01020 double ExternalMutations =
01021 double (NumExternalMutations ());
01022 double NumMut = double (NumMutations ());
01023 double a = a_sub_n ();
01024 double b = b_sub_n ();
01025 double c = c_sub_n ();
01026 double vD = 1.0 +
01027 (pow (a, 2.0) / (b + pow (a, 2.0)) *
01028 (c - (rep->_totsam + 1.0) / (rep->_totsam - 1.0)));
01029 double uD = a - 1.0 - vD;
01030 D = NumMut - a * double (ExternalMutations);
01031 D /= pow ((uD * NumMut + vD * pow (NumMut, 2.0)), 0.5);
01032 return (D);
01033 }
01034
01035
01036
01037 double
01038 PolySNP::FuLiF (void)
01044 {
01045 assert ( rep->_preprocessed );
01046 if(rep->_NumPoly==0 || !rep->_haveOutgroup) return strtod("NAN",NULL);
01047 double F = 0.0;
01048 double Pi = ThetaPi ();
01049 double NumMut = double (NumMutations());
01050 double ExternalMutations =
01051 double (NumExternalMutations ());
01052 double a = a_sub_n ();
01053 double a_n_plus1 = a_sub_n_plus1 ();
01054 double b = b_sub_n ();
01055 double c = c_sub_n ();
01056 double vF = c + 2.0 * (pow (rep->_totsam, 2.0) + rep->_totsam +
01057 3.0) / (9.0 * rep->_totsam * (double (rep->_totsam - 1.0)));
01058 vF -= (2.0 / (rep->_totsam - 1.0));
01059 vF /= (pow (a, 2.0) + b);
01060
01061 double uF = 1.0 + (rep->_totsam + 1.0) / (3.0 * (double (rep->_totsam - 1.0)));
01062 uF -= 4.0 * ((rep->_totsam + 1.0) / (pow (rep->_totsam - 1.0, 2.0))) *
01063 (a_n_plus1 - 2.0 * rep->_totsam / (rep->_totsam + 1.0));
01064 uF /= a;
01065 uF -= vF;
01066
01067 F = Pi - ExternalMutations;
01068 F /= pow (uF * NumMut + vF * pow (NumMut, 2.0), 0.5);
01069 return (F);
01070 }
01071
01072
01073 double
01074 PolySNP::FuLiDStar (void)
01079 {
01080 assert ( rep->_preprocessed );
01081 if(rep->_NumPoly==0) return strtod("NAN",NULL);
01082 double DStar = 0.0;
01083 double Singletons =
01084 double (NumSingletons ());
01085 double NumMut = double (NumMutations ());
01086
01087 double a = a_sub_n ();
01088 double b = b_sub_n ();
01089 double d = d_sub_n ();
01090
01091 double vD = pow (rep->_totsam / (rep->_totsam - 1.0), 2.0) * b;
01092 vD += pow (a, 2.0) * d;
01093 vD -= 2.0 * (rep->_totsam * a * (a + 1.0)) /
01094 (pow (double (rep->_totsam - 1.0), 2.0));
01095 vD /= (pow (a, 2.0) + b);
01096
01097 double uD =
01098 (rep->_totsam / (rep->_totsam - 1.0)) * (a -
01099 (rep->_totsam / (rep->_totsam - 1.0))) - vD;
01100
01101 DStar = (rep->_totsam / (rep->_totsam - 1.0)) * NumMut - a * double (Singletons);
01102 DStar /= pow (uD * NumMut + vD * pow (NumMut, 2.0), 0.5);
01103 return (DStar);
01104 }
01105
01106
01107 double
01108 PolySNP::FuLiFStar (void)
01115 {
01116 assert ( rep->_preprocessed );
01117 if(rep->_NumPoly==0) return strtod("NAN",NULL);
01118 double FStar = 0.0;
01119 double Singletons =
01120 double (NumSingletons ());
01121 double Pi = ThetaPi ();
01122 double NumMut = double (NumMutations ());
01123
01124 double a = a_sub_n ();
01125 double a_n_plus1 = a_sub_n_plus1 ();
01126 double b = b_sub_n ();
01127
01128
01129 double vF = 2.0 * pow (rep->_totsam, 3.0) + 110.0 * pow (rep->_totsam,
01130 2.0) -
01131 255.0 * rep->_totsam + 153.0;
01132 vF /= (9.0 * pow (rep->_totsam, 2.0) * (rep->_totsam - 1.0));
01133 vF += (((2.0 * (rep->_totsam - 1.0) * a) / pow (rep->_totsam, 2.0)) -
01134 (8.0 * b / rep->_totsam));
01135 vF /= (pow (a, 2.0) + b);
01136
01137 double uF =
01138 (4.0 * pow (rep->_totsam, 2.0) + 19.0 * rep->_totsam + 3.0 -
01139 12.0 * (rep->_totsam + 1.0) * a_n_plus1);
01140 uF /= (3.0 * rep->_totsam * (rep->_totsam - 1.0));
01141 uF /= a;
01142 uF -= vF;
01143 FStar = Pi - (((rep->_totsam - 1.0) / rep->_totsam)) * double (Singletons);
01144 FStar /= pow ((uF * NumMut + vF * pow (NumMut, 2.0)), 0.5);
01145 return (FStar);
01146 }
01147
01148 double
01149 PolySNP::a_sub_n (void)
01155 {
01156 assert ( rep->_preprocessed );
01157 int i;
01158 double a = 0.0;
01159 for (i = 1; i < int (rep->_totsam); ++i)
01160 a += 1. / double (i);
01161 return a;
01162 }
01163
01164 double
01165 PolySNP::a_sub_n_plus1 (void)
01170 {
01171 assert ( rep->_preprocessed );
01172 int i;
01173 double a = 0.0;
01174 for (i = 1; i < int (rep->_totsam) + 1; ++i)
01175 {
01176 a += 1. / double (i);
01177 }
01178 return (a);
01179 }
01180
01181 double
01182 PolySNP::b_sub_n (void)
01187 {
01188 assert ( rep->_preprocessed );
01189 int i;
01190 double b = 0.0;
01191 for (i = 1; i < int (rep->_totsam); ++i)
01192 b += 1. / (pow (double (i), 2.0));
01193 return b;
01194 }
01195
01196 double
01197 PolySNP::b_sub_n_plus1(void)
01203 {
01204 assert ( rep->_preprocessed );
01205 int i;
01206 double b = 0.0;
01207 for (i = 1; i < int (rep->_totsam) + 1; ++i)
01208 b += 1. / (pow (double (i), 2.0));
01209 return b;
01210 }
01211
01212 double
01213 PolySNP::c_sub_n (void)
01223 {
01224 assert ( rep->_preprocessed );
01225 double c = 0.0, a = a_sub_n ();
01226 if (fabs(rep->_totsam-2.) <= DBL_EPSILON)
01227 {
01228 c = 1.0;
01229 }
01230 else
01231 {
01232 c = 2.0 * (rep->_totsam * a - 2.0 * (rep->_totsam - 1.0));
01233 c /= ((rep->_totsam - 1.0) * (rep->_totsam - 2.0));
01234 }
01235 return c;
01236 }
01237
01238 double
01239 PolySNP::d_sub_n (void)
01244 {
01245 assert ( rep->_preprocessed );
01246 double a_n_plus1, c, d;
01247 a_n_plus1 = a_sub_n_plus1 ();
01248 c = c_sub_n ();
01249 d = c + (rep->_totsam - 2.0) / (pow (rep->_totsam - 1.0, 2.0));
01250 d += (2.0 / (rep->_totsam - 1.0)) *
01251 (1.5 - ((2.0 * a_n_plus1 - 3.0) / (rep->_totsam - 2.0)) - 1.0 / rep->_totsam);
01252 return d;
01253 }
01254
01255 double
01256 PolySNP::DandVH (void)
01265 {
01266 if (!(rep->_CalculatedDandV))
01267 DepaulisVeuilleStatistics ();
01268
01269 return rep->_DVH;
01270 }
01271
01272 unsigned
01273 PolySNP::DandVK (void)
01282 {
01283 if (!(rep->_CalculatedDandV))
01284 DepaulisVeuilleStatistics ();
01285
01286 return rep->_DVK;
01287 }
01288
01289 double
01290 PolySNP::HudsonsC (void)
01298 {
01299 assert ( rep->_preprocessed );
01300 if(rep->_NumPoly==0) return strtod("NAN",NULL);
01301 return(Recombination::HudsonsC (rep->_data, rep->_haveOutgroup, rep->_outgroup));
01302 }
01303
01304
01305 unsigned
01306 PolySNP::Minrec (void)
01313 {
01314 assert ( rep->_preprocessed );
01315 if(rep->_NumPoly<2) return SEQMAXUNSIGNED;
01316 unsigned a,b,e,numgametes,Rmin=0,x=0;
01317 bool flag=false;
01318
01319 char c11,c12,c21,c22;
01320 unsigned states1=0,states2=0;
01321
01322 for (a=x+1 ; a < rep->_nsites ; ++a)
01323 {
01324 c11 = c12 = 'Z';
01325
01326 states1 = rep->_counts[a].nStates();
01327
01328 c11 = (c11 == 'Z' && rep->_counts[a].a > 0 ) ? 'A' : 'Z';
01329 c11 = (c11 == 'Z' && rep->_counts[a].g > 0 ) ? 'G' : c11;
01330 c11 = (c11 == 'Z' && rep->_counts[a].c > 0 ) ? 'C' : c11;
01331 c11 = (c11 == 'Z' && rep->_counts[a].t > 0 ) ? 'T' : c11;
01332 c11 = (c11 == 'Z' && rep->_counts[a].zero > 0 ) ? '0' : c11;
01333 c11 = (c11 == 'Z' && rep->_counts[a].one > 0 ) ? '1' : c11;
01334
01335 c12 = (c12 == 'Z' && c11 != 'A' && rep->_counts[a].a > 0) ? 'A' : 'Z';
01336 c12 = (c12 == 'Z' && c11 != 'G' && rep->_counts[a].g > 0) ? 'G' : c12;
01337 c12 = (c12 == 'Z' && c11 != 'C' && rep->_counts[a].c > 0) ? 'C' : c12;
01338 c12 = (c12 == 'Z' && c11 != 'T' && rep->_counts[a].t > 0) ? 'T' : c12;
01339 c12 = (c12 == 'Z' && c11 != '0' && rep->_counts[a].zero > 0) ? '0' : c12;
01340 c12 = (c12 == 'Z' && c11 != '1' && rep->_counts[a].one > 0) ? '1' : c12;
01341
01342 for (b = (flag == false) ? x : a-1 ; b < a; ++b)
01343 {
01344 flag = false;
01345 numgametes = 0;
01346 c21=c22='Z';
01347 states2 = rep->_counts[b].nStates();
01348
01349 if(states1==2&&states2==2)
01350 {
01351 c21 = (c21 == 'Z' && rep->_counts[b].a > 0 ) ? 'A' : 'Z';
01352 c21 = (c21 == 'Z' && rep->_counts[b].g > 0 ) ? 'G' : c21;
01353 c21 = (c21 == 'Z' && rep->_counts[b].c > 0 ) ? 'C' : c21;
01354 c21 = (c21 == 'Z' && rep->_counts[b].t > 0 ) ? 'T' : c21;
01355 c21 = (c21 == 'Z' && rep->_counts[b].zero > 0 ) ? '0' : c21;
01356 c21 = (c21 == 'Z' && rep->_counts[b].one > 0 ) ? '1' : c21;
01357
01358 c22 = (c22 == 'Z' && c21 != 'A' && rep->_counts[b].a > 0) ? 'A' : 'Z';
01359 c22 = (c22 == 'Z' && c21 != 'G' && rep->_counts[b].g > 0) ? 'G' : c22;
01360 c22 = (c22 == 'Z' && c21 != 'C' && rep->_counts[b].c > 0) ? 'C' : c22;
01361 c22 = (c22 == 'Z' && c21 != 'T' && rep->_counts[b].t > 0) ? 'T' : c22;
01362 c22 = (c22 == 'Z' && c21 != '0' && rep->_counts[b].zero > 0) ? '0' : c22;
01363 c22 = (c22 == 'Z' && c21 != '1' && rep->_counts[b].one > 0) ? '1' : c22;
01364
01365 for (e = 0 ; e < rep->_nsam ; ++e)
01366 {
01367 if (!rep->_haveOutgroup || (rep->_haveOutgroup && e != rep->_outgroup) )
01368 if (toupper( (*rep->_data)[e][a] ) == c11 &&
01369 toupper( (*rep->_data)[e][b] ) == c21 )
01370 {
01371 ++numgametes;
01372 break;
01373 }
01374 }
01375 for (e = 0 ; e < rep->_nsam ; ++e)
01376 {
01377 if (!rep->_haveOutgroup || (rep->_haveOutgroup && e != rep->_outgroup) )
01378 if (toupper( (*rep->_data)[e][a] ) == c11 &&
01379 toupper( (*rep->_data)[e][b] ) == c22 )
01380 {
01381 ++numgametes;
01382 break;
01383 }
01384 }
01385 for (e = 0 ; e < rep->_nsam ; ++e)
01386 {
01387 if (!rep->_haveOutgroup || (rep->_haveOutgroup && e != rep->_outgroup) )
01388 if (toupper( (*rep->_data)[e][a] ) == c12 &&
01389 toupper( (*rep->_data)[e][b] ) == c21 )
01390 {
01391 ++numgametes;
01392 break;
01393 }
01394 }
01395 for (e = 0 ; e < rep->_nsam ; ++e)
01396 {
01397 if (!rep->_haveOutgroup || (rep->_haveOutgroup && e != rep->_outgroup) )
01398 if (toupper( (*rep->_data)[e][a] ) == c12 &&
01399 toupper( (*rep->_data)[e][b] ) == c22 )
01400 {
01401 ++numgametes;
01402 break;
01403 }
01404 }
01405 if (numgametes == 4)
01406 {
01407 ++Rmin;
01408 flag = true;
01409 break;
01410 }
01411 }
01412 }
01413 if (flag == true)
01414 x=a;
01415 }
01416 return Rmin;
01417 }
01418
01419 std::vector < std::vector < double > >
01420 PolySNP::Disequilibrium ( unsigned mincount )
01431 {
01432 assert ( rep->_preprocessed );
01433 if(rep->_NumPoly<2) return std::vector < std::vector < double > >();
01434 return Recombination::Disequilibrium (rep->_data, rep->_haveOutgroup, rep->_outgroup,
01435 mincount);
01436 }
01437 }