00001 #include <cmath>
00002 #include <cfloat>
00003 #include <cassert>
00004 #include <utility>
00005 #include <cstdlib>
00006 #include <cctype>
00007 #include <set>
00008 #include <algorithm>
00009 #include <boost/bind.hpp>
00010 #include <Sequence/PolyTable.hpp>
00011 #include <Sequence/Comparisons.hpp>
00012 #include <Sequence/Recombination.hpp>
00013 #include <Sequence/PolySNP.hpp>
00014 #include <Sequence/stateCounter.hpp>
00015 #include <Sequence/SeqConstants.hpp>
00016 #include <Sequence/PolySNPimpl.hpp>
00017
00023 using std::string;
00024 using namespace Sequence::Recombination;
00025
00026 namespace Sequence
00027 {
00028 struct uniqueSeq : public std::binary_function<std::string,std::string,bool>
00029 {
00030 inline bool operator()(const std::string & l, const std::string & r) const
00031 {
00032
00033
00034 return ( l<r && Different(l,r) );
00035 }
00036 };
00037 _PolySNPImpl::_PolySNPImpl (const Sequence::PolyTable * data, bool haveOutgroup ,
00038 unsigned outgroup, bool totMuts):
00039 _data(data),
00040 _nsites(data->numsites()),
00041 _nsam(data->size()),
00042 _outgroup(outgroup),
00043 _haveOutgroup(haveOutgroup),
00044 _totMuts(totMuts),
00045 _totsam (data->size()),
00046 _DVK(0),
00047 _DVH(1.0),
00048 _counted_singletons(false),
00049 _know_pi(false),
00050 _CalculatedDandV(false),
00051 _pi(0.),
00052 _singletons(0),
00053 _walls_Bprime(0),
00054 _NumPoly(0),
00055 _walls_B(0.),_walls_Q(0.),
00056 _calculated_wall_stats(false),
00057 _counts(std::vector< Sequence::stateCounter >(_nsites,Sequence::stateCounter('-'))),
00058 _derivedCounts(std::vector< std::pair< bool, stateCounter > >
00059 (_nsites,
00060 std::make_pair<bool,stateCounter>(true,stateCounter('-')))),
00061 _preprocessed(false)
00062 {
00063 if (haveOutgroup)
00064 --_totsam;
00065 preprocess();
00066 }
00067
00068 void _PolySNPImpl::preprocess()
00084 {
00085 if (!_preprocessed)
00086 {
00087 for(unsigned site = 0 ; site < _nsites ; ++site)
00088 {
00089 for (unsigned seq = 0 ; seq < _nsam ; ++seq)
00090 {
00091
00092
00093 if (!_haveOutgroup
00094 || (_haveOutgroup && seq != _outgroup))
00095 {
00096 _counts[site]( (*_data)[seq][site] );
00097 }
00098
00099
00100 if (_haveOutgroup == true)
00101 {
00102
00103
00104
00105 if ( std::toupper((*_data)[_outgroup][site]) == 'N' ||
00106 (*_data)[_outgroup][site] == '-')
00107 {
00108 _derivedCounts[site].first = false;
00109 }
00110 else
00111 {
00112
00113 _derivedCounts[site].first = true;
00114 if ( seq != _outgroup && (*_data)[seq][site] != (*_data)[_outgroup][site] )
00115 {
00116 _derivedCounts[site].second((*_data)[seq][site]);
00117 }
00118 }
00119 }
00120 else
00121 {
00122
00123
00124 _derivedCounts[site].first = false;
00125 }
00126 }
00127 if(_counts[site].nStates() > 1 && _counts[site].gap==0) ++_NumPoly;
00128 }
00129 _preprocessed = true;
00130 }
00131 }
00132
00133 PolySNP::PolySNP (const Sequence::PolyTable * data, bool haveOutgroup ,
00134 unsigned outgroup, bool totMuts):
00135 rep(std::auto_ptr<_PolySNPImpl>(new _PolySNPImpl(data,haveOutgroup,outgroup,totMuts)))
00145 {}
00146
00147 PolySNP::~PolySNP (void)
00148 {
00149 }
00150
00151 double
00152 PolySNP::ThetaPi (void)
00179 {
00180 assert ( rep->_preprocessed );
00181 if (rep->_know_pi == false)
00182 {
00183 double Pi = 0.0;
00184 for (unsigned i = 0; i < rep->_nsites; ++i)
00185 {
00186 if ( rep->_counts[i].gap == 0 &&
00187 rep->_counts[i].nStates() > 1 )
00188 {
00189 unsigned samplesize = rep->_totsam;
00190 double SSH = 0.0;
00191 samplesize -= rep->_counts[i].n;
00192 if (samplesize > 1)
00193 {
00194 double denom = (double(samplesize)* (double(samplesize) - 1.0));
00195 SSH += (rep->_counts[i].a > 0) ? double(rep->_counts[i].a) *
00196 double (rep->_counts[i].a-1) /denom : 0. ;
00197 SSH += (rep->_counts[i].g > 0) ? double(rep->_counts[i].g) *
00198 double (rep->_counts[i].g-1) /denom : 0. ;
00199 SSH += (rep->_counts[i].c > 0) ? double(rep->_counts[i].c) *
00200 double (rep->_counts[i].c-1) /denom : 0. ;
00201 SSH += (rep->_counts[i].t > 0) ? double(rep->_counts[i].t) *
00202 double (rep->_counts[i].t-1) /denom : 0. ;
00203 SSH += (rep->_counts[i].zero > 0) ? double(rep->_counts[i].zero) *
00204 double (rep->_counts[i].zero-1) /denom : 0. ;
00205 SSH += (rep->_counts[i].one > 0) ? double(rep->_counts[i].one) *
00206 double (rep->_counts[i].one-1) /denom : 0. ;
00207 Pi += (1.0 - SSH);
00208 }
00209 }
00210 }
00211 rep->_pi = Pi;
00212 rep->_know_pi = true;
00213 return rep->_pi;
00214 }
00215 else
00216 return rep->_pi;
00217 return rep->_pi;
00218 }
00219
00220
00221 double
00222 PolySNP::ThetaW (void)
00240 {
00241 assert ( rep->_preprocessed );
00242 double W = 0.0;
00243 for (unsigned i = 0; i < rep->_nsites; ++i)
00244 {
00245 if ( rep->_counts[i].gap == 0)
00246 {
00247 int nstates = rep->_counts[i].nStates();
00248 unsigned nsam_site = rep->_totsam - rep->_counts[i].n;
00249 double denom=0.0;
00250 if(rep->_totMuts==true&&nstates>=2)
00251 {
00252 for(unsigned i = 1 ; i < nsam_site ; ++i)
00253 denom += 1.0/double(i);
00254 W += double(nstates-1)/denom;
00255 }
00256 else if (rep->_totMuts==false &&nstates>=2)
00257 {
00258 for(unsigned i = 1 ; i < nsam_site ; ++i)
00259 denom += 1.0/double(i);
00260 W += 1.0/denom;
00261 }
00262 }
00263 }
00264 return (W);
00265 }
00266
00267 double
00268 PolySNP::ThetaH (void)
00300 {
00301 assert ( rep->_preprocessed );
00302 if (rep->_NumPoly==0)
00303 return 0.;
00304 if( !rep->_haveOutgroup)
00305 return strtod("NAN",NULL);
00306 double H = 0.0;
00307 bool anc_is_present = 0;
00308
00309 for (unsigned i = 0; i < rep->_nsites; ++i)
00310 {
00311 if ( rep->_derivedCounts[i].second.gap == 0)
00312 {
00313 unsigned samplesize = rep->_totsam;
00314 unsigned sumDerCounts = 0;
00315 sumDerCounts += rep->_derivedCounts[i].second.a;
00316 sumDerCounts += rep->_derivedCounts[i].second.g;
00317 sumDerCounts += rep->_derivedCounts[i].second.c;
00318 sumDerCounts += rep->_derivedCounts[i].second.t;
00319 sumDerCounts += rep->_derivedCounts[i].second.zero;
00320 sumDerCounts += rep->_derivedCounts[i].second.one;
00321 unsigned ancestralCounts = samplesize-sumDerCounts-rep->_derivedCounts[i].second.n;
00322
00323
00324
00325
00326
00327 anc_is_present = (rep->_derivedCounts[i].first == true &&
00328 ancestralCounts > 0 ) ? true : false;
00329 if (anc_is_present)
00330 {
00331
00332 int numDer = rep->_derivedCounts[i].second.nStates();
00333 if (numDer == 1)
00334 {
00335 samplesize -= rep->_derivedCounts[i].second.n;
00336 double denom = (double(samplesize) *
00337 (double(samplesize) - 1.0));
00338 H += (rep->_derivedCounts[i].second.a > 0 ) ?
00339 (2.0 * pow (double (rep->_derivedCounts[i].second.a), 2.0)) / denom : 0. ;
00340 H += (rep->_derivedCounts[i].second.g > 0 ) ?
00341 (2.0 * pow (double (rep->_derivedCounts[i].second.g), 2.0)) / denom : 0. ;
00342 H += (rep->_derivedCounts[i].second.c > 0 ) ?
00343 (2.0 * pow (double (rep->_derivedCounts[i].second.c), 2.0)) / denom : 0. ;
00344 H += (rep->_derivedCounts[i].second.t > 0 ) ?
00345 (2.0 * pow (double (rep->_derivedCounts[i].second.t), 2.0)) / denom : 0. ;
00346 H += (rep->_derivedCounts[i].second.zero > 0) ?
00347 (2.0 * pow (double (rep->_derivedCounts[i].second.zero), 2.0)) / denom : 0. ;
00348 H += (rep->_derivedCounts[i].second.one > 0 ) ?
00349 (2.0 * pow (double (rep->_derivedCounts[i].second.one), 2.0)) / denom : 0. ;
00350 }
00351 else if (numDer == 2 && rep->_haveOutgroup)
00352 {
00353
00354 int config[2];
00355 unsigned k = 0;
00356 if(rep->_derivedCounts[i].second.a > 0)
00357 {
00358 config[k++] = rep->_derivedCounts[i].second.a;
00359 }
00360 if(rep->_derivedCounts[i].second.g > 0)
00361 {
00362 config[k++] = rep->_derivedCounts[i].second.g;
00363 }
00364 if(rep->_derivedCounts[i].second.c > 0)
00365 {
00366 config[k++] = rep->_derivedCounts[i].second.c;
00367 }
00368 if(rep->_derivedCounts[i].second.t > 0)
00369 {
00370 config[k++] = rep->_derivedCounts[i].second.t;
00371 }
00372 if(rep->_derivedCounts[i].second.zero > 0)
00373 {
00374 config[k++] = rep->_derivedCounts[i].second.zero;
00375 }
00376 if(rep->_derivedCounts[i].second.one > 0)
00377 {
00378 config[k++] = rep->_derivedCounts[i].second.one;
00379 }
00380 for (int i = 0; i < 2; ++i)
00381 {
00382 double sample_size_adjust = (i==0) ? double(config[1]) : double(config[0]);
00383 H += (2.0 *
00384 pow (double (config[i]),
00385 2.0)) / ((double(samplesize) -
00386 sample_size_adjust)
00387 * (double(samplesize) -
00388 sample_size_adjust
00389 - 1.0));
00390 }
00391 }
00392 }
00393 }
00394 }
00395 return H;
00396 }
00397
00398 double
00399 PolySNP::ThetaL (void)
00432 {
00433 assert(rep->_haveOutgroup==true);
00434 assert ( rep->_preprocessed );
00435 double thetal = 0.0;
00436 if(rep->_NumPoly==0) return thetal;
00437 bool anc_is_present = 0;
00438
00439 for (unsigned i = 0; i < rep->_nsites; ++i)
00440 {
00441 if (rep->_derivedCounts[i].first == true && rep->_derivedCounts[i].second.gap == 0)
00442 {
00443 unsigned samplesize = rep->_totsam;
00444 unsigned sumDerCounts = 0;
00445 sumDerCounts += rep->_derivedCounts[i].second.a;
00446 sumDerCounts += rep->_derivedCounts[i].second.g;
00447 sumDerCounts += rep->_derivedCounts[i].second.c;
00448 sumDerCounts += rep->_derivedCounts[i].second.t;
00449 sumDerCounts += rep->_derivedCounts[i].second.zero;
00450 sumDerCounts += rep->_derivedCounts[i].second.one;
00451 unsigned ancestralCounts = samplesize-sumDerCounts-rep->_derivedCounts[i].second.n;
00452
00453
00454
00455
00456
00457 anc_is_present = (rep->_derivedCounts[i].first == true &&
00458 ancestralCounts > 0 ) ? true : false;
00459 if (anc_is_present)
00460 {
00461
00462 int numDer = rep->_derivedCounts[i].second.nStates();
00463 if (numDer == 1)
00464 {
00465 samplesize -= rep->_derivedCounts[i].second.n;
00466 double denom = (double(samplesize) - 1.0);
00467 thetal += (rep->_derivedCounts[i].second.a > 0 ) ?
00468 double (rep->_derivedCounts[i].second.a) / denom : 0. ;
00469 thetal += (rep->_derivedCounts[i].second.g > 0 ) ?
00470 double (rep->_derivedCounts[i].second.g) / denom : 0. ;
00471 thetal += (rep->_derivedCounts[i].second.c > 0 ) ?
00472 double (rep->_derivedCounts[i].second.c) / denom : 0. ;
00473 thetal += (rep->_derivedCounts[i].second.t > 0 ) ?
00474 double (rep->_derivedCounts[i].second.t) / denom : 0. ;
00475 thetal += (rep->_derivedCounts[i].second.zero > 0) ?
00476 double (rep->_derivedCounts[i].second.zero) / denom : 0. ;
00477 thetal += (rep->_derivedCounts[i].second.one > 0 ) ?
00478 double (rep->_derivedCounts[i].second.one) / denom : 0. ;
00479 }
00480 else if (numDer == 2 && rep->_haveOutgroup)
00481 {
00482
00483 int config[2];
00484 unsigned k = 0;
00485 if(rep->_derivedCounts[i].second.a > 0)
00486 {
00487 config[k++] = rep->_derivedCounts[i].second.a;
00488 }
00489 if(rep->_derivedCounts[i].second.g > 0)
00490 {
00491 config[k++] = rep->_derivedCounts[i].second.g;
00492 }
00493 if(rep->_derivedCounts[i].second.c > 0)
00494 {
00495 config[k++] = rep->_derivedCounts[i].second.c;
00496 }
00497 if(rep->_derivedCounts[i].second.t > 0)
00498 {
00499 config[k++] = rep->_derivedCounts[i].second.t;
00500 }
00501 if(rep->_derivedCounts[i].second.zero > 0)
00502 {
00503 config[k++] = rep->_derivedCounts[i].second.zero;
00504 }
00505 if(rep->_derivedCounts[i].second.one > 0)
00506 {
00507 config[k++] = rep->_derivedCounts[i].second.one;
00508 }
00509 for (int i = 0; i < 2; ++i)
00510 {
00511 double sample_size_adjust = (i==0) ? double(config[1]) : double(config[0]);
00512 thetal += (double (config[i])) / (double(samplesize) -
00513 sample_size_adjust- 1.0);
00514 }
00515 }
00516 }
00517 }
00518 }
00519 return thetal;
00520 }
00521
00522 unsigned
00523 PolySNP::NumPoly (void)
00527 {
00528 assert ( rep->_preprocessed );
00529 unsigned npoly = 0;
00530 for (unsigned i = 0; i < rep->_nsites; ++i)
00531 {
00532 if (rep->_counts[i].nStates() > 1 &&
00533 rep->_counts[i].gap == 0)
00534 ++npoly;
00535 }
00536 return npoly;
00537 }
00538
00539 unsigned
00540 PolySNP::NumMutations (void)
00545 {
00546 assert( rep->_preprocessed );
00547 unsigned nmut = 0;
00548
00549 for (unsigned i = 0; i < rep->_nsites; ++i)
00550 {
00551 int nstates = (rep->_counts[i].gap==0) ? rep->_counts[i].nStates() : 0;
00552 if (nstates > 1)
00553 nmut += nstates - 1;
00554 }
00555 return nmut;
00556 }
00557
00558 unsigned
00559 PolySNP::NumSingletons (void)
00563 {
00564 assert ( rep->_preprocessed );
00565 unsigned nsing = 0,nstates;
00566 for (unsigned i = 0; i < rep->_nsites; ++i)
00567 {
00568 unsigned curr_nsing=0,nsam=0;
00569 nstates = rep->_counts[i].nStates();
00570 if (rep->_counts[i].gap == 0 && nstates>1)
00571 {
00572 nsam = rep->_totsam - rep->_counts[i].n;
00573 if(nsam==2 && nstates ==2)
00574 curr_nsing=1;
00575 else
00576 {
00577 curr_nsing += (rep->_counts[i].a == 1) ? 1 : 0;
00578 curr_nsing += (rep->_counts[i].g == 1) ? 1 : 0;
00579 curr_nsing += (rep->_counts[i].c == 1) ? 1 : 0;
00580 curr_nsing += (rep->_counts[i].t == 1) ? 1 : 0;
00581 curr_nsing += (rep->_counts[i].zero == 1) ? 1 : 0;
00582 curr_nsing += (rep->_counts[i].one == 1) ? 1 : 0;
00583 }
00584 }
00585 nsing += curr_nsing;
00586 }
00587 return nsing;
00588 }
00589
00590
00591 unsigned
00592 PolySNP::NumExternalMutations (void)
00597 {
00598 if(!rep->_haveOutgroup) return SEQMAXUNSIGNED;
00599 assert ( rep->_preprocessed );
00600 int next = 0;
00601 for (unsigned i = 0; i < rep->_nsites ; ++i)
00602 {
00603 unsigned nsam=rep->_totsam;
00604 unsigned curr_next=0;
00605 if(rep->_derivedCounts[i].first == true &&
00606 rep->_derivedCounts[i].second.gap == 0)
00607 {
00608 nsam -= rep->_derivedCounts[i].second.n;
00609 curr_next += (rep->_derivedCounts[i].second.a == 1) ? 1 : 0;
00610 curr_next += (rep->_derivedCounts[i].second.g == 1) ? 1 : 0;
00611 curr_next += (rep->_derivedCounts[i].second.c == 1) ? 1 : 0;
00612 curr_next += (rep->_derivedCounts[i].second.t == 1) ? 1 : 0;
00613 curr_next += (rep->_derivedCounts[i].second.zero == 1) ? 1 : 0;
00614 curr_next += (rep->_derivedCounts[i].second.one == 1) ? 1 : 0;
00615 }
00616 next += (nsam>1) ? curr_next : 0;
00617 }
00618 return next;
00619 }
00620
00621
00622 double
00623 PolySNP::TajimasD (void)
00631 {
00632 assert ( rep->_preprocessed );
00633 if(rep->_NumPoly==0) return strtod("NAN",NULL);
00634 double D = 0.0;
00635 double Pi = ThetaPi ();
00636 double W = ThetaW ();
00637 if (fabs(Pi-0.) <= DBL_EPSILON && fabs(W-0.) <= DBL_EPSILON)
00638 D = 0.0;
00639 else
00640 D = (Pi - W) / Dnominator ();
00641 return D;
00642 }
00643
00644 double PolySNP::Hprime (bool likeThorntonAndolfatto)
00657 {
00658 assert ( rep->_preprocessed );
00659 if(rep->_NumPoly==0) return strtod("NAN",NULL);
00660 assert(rep->_haveOutgroup==true);
00661 double Hpr = 0.0;
00662 double a = a_sub_n ();
00663 double b = b_sub_n ();
00664 double pi = ThetaPi ();
00665 double theta = ThetaW();
00666
00667 double thetal = ThetaL();
00668 double b_n_plus1 = b_sub_n_plus1();
00669 double S = (rep->_totMuts) ? NumMutations() : NumPoly();
00670 double thetasq = (likeThorntonAndolfatto == false ) ? S * (S-1)/(a*a + b) : theta*theta;
00671
00672 double vThetal =
00673 (rep->_totsam * theta)/(2.0 * (rep->_totsam - 1.0))
00674 + (2.0 * pow(rep->_totsam/(rep->_totsam - 1.0), 2.0) * (b_n_plus1 - 1.0) - 1.0) * thetasq;
00675
00676 double vPi =
00677 (3.0 * rep->_totsam *(rep->_totsam + 1.0) * theta
00678 + 2.0 * ( rep->_totsam * rep->_totsam + rep->_totsam + 3.0) * thetasq )
00679 / (9 * rep->_totsam * (rep->_totsam -1.0));
00680
00681 double cov =
00682 ((rep->_totsam + 1.0) / (3.0 * (rep->_totsam - 1.0))) * theta
00683 +(( 7.0 * rep->_totsam * rep->_totsam + 3.0 * rep->_totsam - 2.0 - 4.0 * rep->_totsam *( rep->_totsam + 1.0) * b_n_plus1)
00684 / (2.0 * pow ((rep->_totsam - 1.0), 2.0)))
00685 * thetasq ;
00686
00687
00688 Hpr = pi - thetal;
00689 Hpr /= pow ( (vThetal + vPi - 2.0 * cov), 0.5);
00690 return (Hpr);
00691 }
00692
00693
00694 double
00695 PolySNP::Dnominator (void)
00700 {
00701 assert ( rep->_preprocessed );
00702 if(rep->_NumPoly==0) return strtod("NAN",NULL);
00703 double S = 0.0;
00704 if (rep->_totMuts)
00705 {
00706 S = double (NumMutations ());
00707 }
00708 else if (!(rep->_totMuts))
00709 {
00710 S = double (NumPoly ());
00711 }
00712 double a1, a2, b1, b2, c1, c2, e1, e2;
00713
00714 a1 = a_sub_n ();
00715 a2 = b_sub_n ();
00716 b1 = (rep->_totsam + 1.0) / (3.0 * (rep->_totsam - 1.0));
00717 b2 = (2.0 * (pow (rep->_totsam, 2.0)
00718 + rep->_totsam + 3.0)) / (9.0 * rep->_totsam *
00719 (rep->_totsam - 1.0));
00720 c1 = b1 - 1.0 / a1;
00721 c2 = b2 - (rep->_totsam + 2.0) / (a1 * rep->_totsam) + a2 / pow (a1, 2.0);
00722 e1 = c1 / a1;
00723 e2 = c2 / (pow (a1, 2.0) + a2);
00724 double denominator = pow ((e1 * S + e2 * S * (S - 1.0)), 0.5);
00725 return (denominator);
00726 }
00727
00728 void
00729 PolySNP::DepaulisVeuilleStatistics (void)
00742 {
00743 assert ( rep->_preprocessed );
00744 if (!(rep->_CalculatedDandV))
00745 {
00746 if(rep->_NumPoly == 0)
00747 {
00748 rep->_DVK = 1;
00749 rep->_DVH = 0.;
00750 return;
00751 }
00752 if (rep->_data->size() > 0)
00753 {
00754
00755
00756 std::set<string,uniqueSeq> unique_haplotypes;
00757 if (rep->_haveOutgroup)
00758 {
00759 unique_haplotypes.insert(rep->_data->begin(),
00760 rep->_data->begin()+rep->_outgroup);
00761 unique_haplotypes.insert(rep->_data->begin()+rep->_outgroup+1,
00762 rep->_data->end());
00763 }
00764 else
00765 {
00766 unique_haplotypes.insert(rep->_data->begin(),
00767 rep->_data->end());
00768 }
00769
00770 std::set<string,uniqueSeq>::const_iterator beg = unique_haplotypes.begin(),
00771 end = unique_haplotypes.end();
00772 rep->_DVK = unique_haplotypes.size();
00773 while(beg != end)
00774 {
00775 unsigned _count = 0;
00776 if (rep->_haveOutgroup)
00777 {
00778 _count += std::count_if(rep->_data->begin(),
00779 rep->_data->begin()+rep->_outgroup,
00780 boost::bind(notDifferent<string>,
00781 _1,*beg));
00782 _count += std::count_if(rep->_data->begin()+rep->_outgroup+1,
00783 rep->_data->end(),
00784 boost::bind(notDifferent<string>,
00785 _1,*beg));
00786 }
00787 else
00788 {
00789 _count += std::count_if(rep->_data->begin(),
00790 rep->_data->end(),
00791 boost::bind(notDifferent<string>,
00792 _1,*beg));
00793 }
00794 rep->_DVH -= pow (double (_count) / rep->_totsam, 2.0);
00795 ++beg;
00796 }
00797 rep->_DVH *= rep->_totsam / (rep->_totsam - 1.0);
00798 rep->_CalculatedDandV = 1;
00799 }
00800 }
00801 }
00802
00803 double PolySNP::WallsB(void)
00809 {
00810 assert ( rep->_preprocessed );
00811 if (rep->_calculated_wall_stats == false)
00812 {
00813 WallStats();
00814 }
00815 return rep->_walls_B;
00816 }
00817
00818 void PolySNP::WallStats(void)
00819 {
00820 assert ( rep->_preprocessed );
00821 unsigned S = 0;
00822
00823
00824 for( std::vector<stateCounter>::const_iterator itr = rep->_counts.begin() ;
00825 itr < rep->_counts.end();
00826 ++itr)
00827 {
00828 if ( itr->nStates() == 2 && itr->gap == 0)
00829 ++S;
00830 }
00831 if (S > 1)
00832 {
00833 unsigned nhap_curr, nhap_left;
00834
00835 nhap_left = SEQMAXUNSIGNED;
00836
00837 unsigned A = 0;
00838
00839 for (unsigned site1 = 0 ; site1 < rep->_nsites-1 ; ++site1)
00840 {
00841 for(unsigned site2=site1+1 ; site2 < rep->_nsites ; ++site2)
00842 {
00843 if ( rep->_counts[site1].nStates() == 2
00844 && rep->_counts[site2].nStates() == 2 )
00845 {
00846 std::string config;
00847 config.resize(2);
00848 std::set<string,uniqueSeq> unique_haplotypes;
00849 nhap_curr = 0;
00850 for (unsigned i = 0 ; i < rep->_nsam ; ++i)
00851 {
00852 if ( (!rep->_haveOutgroup) || (rep->_haveOutgroup && i != rep->_outgroup) )
00853 {
00854 config[0] = (*rep->_data)[i][site1];
00855 config[1] = (*rep->_data)[i][site2];
00856 unique_haplotypes.insert(config);
00857 }
00858 }
00859 nhap_curr = unique_haplotypes.size();
00860 if(site1==0)
00861 {
00862 if (nhap_curr == 2)
00863 {
00864 ++rep->_walls_Bprime;
00865 ++A;
00866 }
00867 }
00868 else
00869 {
00870 if (nhap_curr == 2)
00871 ++rep->_walls_Bprime;
00872 if (nhap_curr == 2 && nhap_left != 2)
00873 ++A;
00874 }
00875 nhap_left = nhap_curr;
00876 site1=site2;
00877 }
00878 }
00879 }
00880 rep->_walls_B = double(rep->_walls_Bprime)/(double(S-1));
00881 rep->_walls_Q = (double(rep->_walls_Bprime) + double(A))/(double(S));
00882 }
00883 else
00884 {
00885 rep->_walls_B = strtod("NAN",NULL);
00886 rep->_walls_Bprime = 0;
00887 rep->_walls_Q = strtod("NAN",NULL);
00888 }
00889 rep->_calculated_wall_stats=true;
00890 }
00891
00892
00893 unsigned PolySNP::WallsBprime(void)
00899 {
00900 assert ( rep->_preprocessed );
00901 if (rep->_calculated_wall_stats == false)
00902 {
00903 WallStats();
00904 }
00905 return rep->_walls_Bprime;
00906 }
00907
00908 double PolySNP::WallsQ(void)
00914 {
00915 assert ( rep->_preprocessed );
00916 if (rep->_calculated_wall_stats == false)
00917 {
00918 WallStats();
00919 }
00920 return rep->_walls_Q;
00921 }
00922
00923 double
00924 PolySNP::VarPi (void)
00929 {
00930 double Pi = ThetaPi ();
00931 double variance = 3.0 * rep->_totsam * (rep->_totsam + 1.0) * Pi +
00932 2.0 * (pow (rep->_totsam, 2.0) + rep->_totsam + 3.0) * pow (Pi, 2.0);
00933 variance /= (11.0 * pow (rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
00934 return (variance);
00935 }
00936
00937 double
00938 PolySNP::StochasticVarPi (void)
00943 {
00944 double Pi = ThetaPi ();
00945 double variance = (3.0 * pow (rep->_totsam, 2.0) - 3.0 * rep->_totsam + 2.0) * Pi +
00946 2.0 * rep->_totsam * (rep->_totsam - 1.0) * pow (Pi, 2.0);
00947 variance /= (11.0 * pow (rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
00948 return (variance);
00949 }
00950
00951 double
00952 PolySNP::SamplingVarPi (void)
00958 {
00959 double Pi = ThetaPi ();
00960 double variance =
00961 2.0 * (3.0 * rep->_totsam - 1.0) * Pi + 2.0 * (2.0 * rep->_totsam +
00962 3.0) * pow (Pi, 2.0);
00963 variance /= (11.0 * pow (rep->_totsam, 2.0) - 7.0 * rep->_totsam + 6.0);
00964 return (variance);
00965 }
00966
00967
00968 double
00969 PolySNP::VarThetaW (void)
00974 {
00975 double a1 = a_sub_n ();
00976 double a2 = b_sub_n ();
00977 double S = (rep->_totMuts) ? NumMutations() : NumPoly();
00978 double variance = pow (a1, 2.0) * S + a2 * pow (S, 2.0);
00979 variance /= pow (a1, 2.0) * (pow (a1, 2.0) + a2);
00980 return (variance);
00981 }
00982
00983
00984 double
00985 PolySNP::FuLiD (void)
00991 {
00992 assert ( rep->_preprocessed );
00993
00994 if(rep->_NumPoly==0 || !rep->_haveOutgroup) return strtod("NAN",NULL);
00995 double D = 0.0;
00996 double ExternalMutations =
00997 double (NumExternalMutations ());
00998 double NumMut = double (NumMutations ());
00999 double a = a_sub_n ();
01000 double b = b_sub_n ();
01001 double c = c_sub_n ();
01002 double vD = 1.0 +
01003 (pow (a, 2.0) / (b + pow (a, 2.0)) *
01004 (c - (rep->_totsam + 1.0) / (rep->_totsam - 1.0)));
01005 double uD = a - 1.0 - vD;
01006 D = NumMut - a * double (ExternalMutations);
01007 D /= pow ((uD * NumMut + vD * pow (NumMut, 2.0)), 0.5);
01008 return (D);
01009 }
01010
01011
01012
01013 double
01014 PolySNP::FuLiF (void)
01020 {
01021 assert ( rep->_preprocessed );
01022 if(rep->_NumPoly==0 || !rep->_haveOutgroup) return strtod("NAN",NULL);
01023 double F = 0.0;
01024 double Pi = ThetaPi ();
01025 double NumMut = double (NumMutations());
01026 double ExternalMutations =
01027 double (NumExternalMutations ());
01028 double a = a_sub_n ();
01029 double a_n_plus1 = a_sub_n_plus1 ();
01030 double b = b_sub_n ();
01031 double c = c_sub_n ();
01032 double vF = c + 2.0 * (pow (rep->_totsam, 2.0) + rep->_totsam +
01033 3.0) / (9.0 * rep->_totsam * (double (rep->_totsam - 1.0)));
01034 vF -= (2.0 / (rep->_totsam - 1.0));
01035 vF /= (pow (a, 2.0) + b);
01036
01037 double uF = 1.0 + (rep->_totsam + 1.0) / (3.0 * (double (rep->_totsam - 1.0)));
01038 uF -= 4.0 * ((rep->_totsam + 1.0) / (pow (rep->_totsam - 1.0, 2.0))) *
01039 (a_n_plus1 - 2.0 * rep->_totsam / (rep->_totsam + 1.0));
01040 uF /= a;
01041 uF -= vF;
01042
01043 F = Pi - ExternalMutations;
01044 F /= pow (uF * NumMut + vF * pow (NumMut, 2.0), 0.5);
01045 return (F);
01046 }
01047
01048
01049 double
01050 PolySNP::FuLiDStar (void)
01055 {
01056 assert ( rep->_preprocessed );
01057 if(rep->_NumPoly==0) return strtod("NAN",NULL);
01058 double DStar = 0.0;
01059 double Singletons =
01060 double (NumSingletons ());
01061 double NumMut = double (NumMutations ());
01062
01063 double a = a_sub_n ();
01064 double b = b_sub_n ();
01065 double d = d_sub_n ();
01066
01067 double vD = pow (rep->_totsam / (rep->_totsam - 1.0), 2.0) * b;
01068 vD += pow (a, 2.0) * d;
01069 vD -= 2.0 * (rep->_totsam * a * (a + 1.0)) /
01070 (pow (double (rep->_totsam - 1.0), 2.0));
01071 vD /= (pow (a, 2.0) + b);
01072
01073 double uD =
01074 (rep->_totsam / (rep->_totsam - 1.0)) * (a -
01075 (rep->_totsam / (rep->_totsam - 1.0))) - vD;
01076
01077 DStar = (rep->_totsam / (rep->_totsam - 1.0)) * NumMut - a * double (Singletons);
01078 DStar /= pow (uD * NumMut + vD * pow (NumMut, 2.0), 0.5);
01079 return (DStar);
01080 }
01081
01082
01083 double
01084 PolySNP::FuLiFStar (void)
01091 {
01092 assert ( rep->_preprocessed );
01093 if(rep->_NumPoly==0) return strtod("NAN",NULL);
01094 double FStar = 0.0;
01095 double Singletons =
01096 double (NumSingletons ());
01097 double Pi = ThetaPi ();
01098 double NumMut = double (NumMutations ());
01099
01100 double a = a_sub_n ();
01101 double a_n_plus1 = a_sub_n_plus1 ();
01102 double b = b_sub_n ();
01103
01104
01105 double vF = 2.0 * pow (rep->_totsam, 3.0) + 110.0 * pow (rep->_totsam,
01106 2.0) -
01107 255.0 * rep->_totsam + 153.0;
01108 vF /= (9.0 * pow (rep->_totsam, 2.0) * (rep->_totsam - 1.0));
01109 vF += (((2.0 * (rep->_totsam - 1.0) * a) / pow (rep->_totsam, 2.0)) -
01110 (8.0 * b / rep->_totsam));
01111 vF /= (pow (a, 2.0) + b);
01112
01113 double uF =
01114 (4.0 * pow (rep->_totsam, 2.0) + 19.0 * rep->_totsam + 3.0 -
01115 12.0 * (rep->_totsam + 1.0) * a_n_plus1);
01116 uF /= (3.0 * rep->_totsam * (rep->_totsam - 1.0));
01117 uF /= a;
01118 uF -= vF;
01119 FStar = Pi - (((rep->_totsam - 1.0) / rep->_totsam)) * double (Singletons);
01120 FStar /= pow ((uF * NumMut + vF * pow (NumMut, 2.0)), 0.5);
01121 return (FStar);
01122 }
01123
01124 double
01125 PolySNP::a_sub_n (void)
01131 {
01132 assert ( rep->_preprocessed );
01133 int i;
01134 double a = 0.0;
01135 for (i = 1; i < int (rep->_totsam); ++i)
01136 a += 1. / double (i);
01137 return a;
01138 }
01139
01140 double
01141 PolySNP::a_sub_n_plus1 (void)
01146 {
01147 assert ( rep->_preprocessed );
01148 int i;
01149 double a = 0.0;
01150 for (i = 1; i < int (rep->_totsam) + 1; ++i)
01151 {
01152 a += 1. / double (i);
01153 }
01154 return (a);
01155 }
01156
01157 double
01158 PolySNP::b_sub_n (void)
01163 {
01164 assert ( rep->_preprocessed );
01165 int i;
01166 double b = 0.0;
01167 for (i = 1; i < int (rep->_totsam); ++i)
01168 b += 1. / (pow (double (i), 2.0));
01169 return b;
01170 }
01171
01172 double
01173 PolySNP::b_sub_n_plus1(void)
01179 {
01180 assert ( rep->_preprocessed );
01181 int i;
01182 double b = 0.0;
01183 for (i = 1; i < int (rep->_totsam) + 1; ++i)
01184 b += 1. / (pow (double (i), 2.0));
01185 return b;
01186 }
01187
01188 double
01189 PolySNP::c_sub_n (void)
01199 {
01200 assert ( rep->_preprocessed );
01201 double c = 0.0, a = a_sub_n ();
01202 if (fabs(rep->_totsam-2.) <= DBL_EPSILON)
01203 {
01204 c = 1.0;
01205 }
01206 else
01207 {
01208 c = 2.0 * (rep->_totsam * a - 2.0 * (rep->_totsam - 1.0));
01209 c /= ((rep->_totsam - 1.0) * (rep->_totsam - 2.0));
01210 }
01211 return c;
01212 }
01213
01214 double
01215 PolySNP::d_sub_n (void)
01220 {
01221 assert ( rep->_preprocessed );
01222 double a_n_plus1, c, d;
01223 a_n_plus1 = a_sub_n_plus1 ();
01224 c = c_sub_n ();
01225 d = c + (rep->_totsam - 2.0) / (pow (rep->_totsam - 1.0, 2.0));
01226 d += (2.0 / (rep->_totsam - 1.0)) *
01227 (1.5 - ((2.0 * a_n_plus1 - 3.0) / (rep->_totsam - 2.0)) - 1.0 / rep->_totsam);
01228 return d;
01229 }
01230
01231 double
01232 PolySNP::DandVH (void)
01241 {
01242 if (!(rep->_CalculatedDandV))
01243 DepaulisVeuilleStatistics ();
01244
01245 return rep->_DVH;
01246 }
01247
01248 unsigned
01249 PolySNP::DandVK (void)
01258 {
01259 if (!(rep->_CalculatedDandV))
01260 DepaulisVeuilleStatistics ();
01261
01262 return rep->_DVK;
01263 }
01264
01265 double
01266 PolySNP::HudsonsC (void)
01274 {
01275 assert ( rep->_preprocessed );
01276 if(rep->_NumPoly==0) return strtod("NAN",NULL);
01277 return(Recombination::HudsonsC (rep->_data, rep->_haveOutgroup, rep->_outgroup));
01278 }
01279
01280
01281 unsigned
01282 PolySNP::Minrec (void)
01289 {
01290 assert ( rep->_preprocessed );
01291 if(rep->_NumPoly<2) return SEQMAXUNSIGNED;
01292 unsigned a,b,e,numgametes,Rmin=0,x=0;
01293 bool flag=false;
01294
01295 char c11,c12,c21,c22;
01296 unsigned states1=0,states2=0;
01297
01298 for (a=x+1 ; a < rep->_nsites ; ++a)
01299 {
01300 c11 = c12 = 'Z';
01301
01302 states1 = rep->_counts[a].nStates();
01303
01304 c11 = (c11 == 'Z' && rep->_counts[a].a > 0 ) ? 'A' : 'Z';
01305 c11 = (c11 == 'Z' && rep->_counts[a].g > 0 ) ? 'G' : c11;
01306