############################################### # This program takes in details of mating # # strategy, values of frequency of genotype # # or phenotype and associated archaeological # # date. The associated inbreeding coefficient # # # is then calculated, and then values of k # # are iteratively explored until they explain # # It then simulates from origin to fixation to# # # give an answer of when the origin # # and fixation dates are likely to have been # # for the given selection coefficient. # # Robin Allaby UoW 2013 # ############################################### $file = $ARGV[0]; $arguments = scalar @ARGV; $matstrat = 0; if ($arguments > 1) { $ms = $ARGV[1]; $matstrat = 1; } if ($arguments < 4) { print "Insufficient number of command line arguments: there must be 5\n"; exit; } $gp = $ARGV[2]; # 1 for phenotype, 0 for genotype data $dr = $ARGV[3]; # 1 for dominant, 0 for recessive locus type $fixedK = $ARGV[4]; # option, when nonzero, k is fixed in order to be able to explore origin dates for 'known' k if ($fixedK > 0) {$fixedK = $fixedK * -1;} $fixed_magnitude = $fixedK * -1; open (IN, "$file") or die "Cannot find input file\n"; @contents = (); @contents = ; close IN; open (TEST, ">testfile") or die "CAnnot write testfile\n"; print TEST "\t\t\t\t\t\tYears Before Present at given frequencies\t\t\t\t\t\t\t\t\tFrequencies at PreIndustrial (PI),Bronze Age (BA) and Neolithic (Ne)\nT1\tT2\tdfreq\t\tk\t\t0.00001\t0.0001\t0.01\t0.03\t0.05\t0.1\t0.15\t0.8\t0.99\tPI\tBA\tNe\n"; # if including mating strategy, calculate suitable inbreeding coefficient from strategy if ($matstrat == 1) { $f = estimate_inbreeding_coefficient ($ms); } @frequencies = (); @dates = (); foreach $line (@contents) { @line = (); chomp $line; @line = split (/\t/, $line); $freq = $line[1]; $date = $line[0]; push (@frequencies, $freq); push (@dates, $date); } $u0 = 0; $un = 0; $cc = 1; $t = 0; $samplesize = scalar @frequencies; foreach $fone (@frequencies) { $c = $cc; while ($c < $samplesize) { #unless ($t == $c) $ftwo = $frequencies[$c]; $done = $dates[$t]; $dtwo = $dates[$c]; unless ($done == $dtwo) { if ($done < $dtwo) {$firstd = $dtwo; $firstf = $ftwo; $secondd = $done; $secondf = $fone; } else { $firstd = $done; $firstf = $fone; $secondd = $dtwo; $secondf = $ftwo; } unless ($firstf > $secondf) { if ($gp == 0) { # genotype data has been entered $qone = $firstf; $qtwo = $secondf; $u0 = (1-$qone)/($qone); $un = (1-$qtwo)/($qtwo); $pone = 1 - $qone; if ($matstrat == 0) { $f = 0; } $AA0 = $pone**2 + $pone*$qone*$f; $Aa0 = 2*$pone*$qone*(1-$f); $aa0 = $qone**2 + $pone*$qone*$f; } else { if ($matstrat == 0) { if ($dr == 0) { $qone = sqrt ($firstf); $pone = 1- $qone; $qtwo = sqrt ($secondf); } else { $phomone = 1 - $firstf; $pone = sqrt ($phomone); $qone = 1 - $pone; $phomtwo = 1 - $secondf; $ptwo = sqrt ($phomtwo); $qtwo = 1 - $ptwo; } $u0 = (1-$qone)/($qone); $un = (1-$qtwo)/($qtwo); $AA0 = $pone**2; $Aa0 = 2*$pone*$qone; $aa0 = $qone**2; } if ($matstrat == 1) { $qone = estimate_q ($firstf, $f, $dr); $qtwo = estimate_q ($secondf, $f, $dr); $pone = 1 - $qone; $ptwo = 1 - $qtwo; $u0 = $pone/$qone; #$udash0 = (($pone**2) + ($pone*$qone*$f))/(($qone**2) + ($pone*$qone*$f)); $un = $ptwo/$qtwo; #$udashn = ($ptwo**2 + $ptwo*$qtwo*$f)/($qtwo**2 + $ptwo*$qtwo*$f); #$h0 = (2*$pone*$qone*(1-$f))/(($qone**2) + ($pone*$qone*$f)); # initial phenotype frequencies: #$f = 0.95; $AA0 = $pone**2 + $pone*$qone*$f; $Aa0 = 2*$pone*$qone*(1-$f); $aa0 = $qone**2 + $pone*$qone*$f; } } # $ponesquared = $pone**2; # print "udash is $udash0\n"; # $numerator = (($pone**2) + ($pone*$qone*$f)); # $secondhalf = ($pone*$qone*$f); # print "numerator: $numerator\n second half: $secondhalf\nmade up of $pone, $qone and $f"; $generations = $firstd - $secondd; print "start frequency $qone\nend frequency $qtwo\ngenerations $generations\n"; print "phenotypes AA: $AA0, Aa: $Aa0, aa: $aa0\n"; print "u0: $u0\nun: $un\n"; #print "h0: $h0\n"; #exit; $next = 0; $k = -0.000001; $oldk = -0.999999; $lower_limit = $k; $upper_limit = $oldk; $dk = 1; #$olddk = $dk; $ulast = $u0; # if ($matstrat == 1) { $hlast = $hdash0; $ulast = $udash0;} $counter = 0; do { $g = 0; $ulast = $u0; $AA = $AA0; $Aa = $Aa0; $aa = $aa0; print "k: $k\n"; $stop = 0; while ($g < $generations) { # if ($matstrat == 0) { # $unew = ($ulast*(1+$ulast))/(1+$ulast-$k); # equation 2.1 in Haldane 1924 Proc Cam Phil Soc 1:158-163 # $ulast = $unew; # } # if ($matstrat == 1) { ## first selfing # allow for no mating strategy if ($matstrat == 0) { $ms = 1;} $Ps = $ms*($AA + 0.25*$Aa); $Rs = $ms*(1-$k)*($aa + 0.25*$Aa); if ($dr == 0) { $Qs = $ms*(0.5*$Aa); } else { $Qs = $ms*(1-$k)*(0.5*$Aa); } # again, neutralize for no mating strategy if ($matstrat == 0) {$ms = 0;} $Pc = (1-$ms)*($AA**2 + 2*0.5*$AA*$Aa + 0.25*$Aa*$Aa); $Rc = (1-$ms)*(1-$k)*($aa**2 + 2*0.5*$aa*$Aa + 0.25*$Aa*$Aa); if ($dr == 0) { $Qc = (1-$ms)*(2*$AA*$aa + 0.5*$Aa*$Aa + 2*0.5*$AA*$Aa + 2*0.5*$aa*$Aa); } else { $Qc = (1-$k)*(1-$ms)*(2*$AA*$aa + 0.5*$Aa*$Aa + 2*0.5*$AA*$Aa + 2*0.5*$aa*$Aa); } $P = $Pc + $Ps; $R = $Rc + $Rs; $Q = $Qc + $Qs; # new proportions $newP = ($P)/($P + $Q + $R); $newQ = $Q/($P + $Q + $R); $newR = $R/($P + $Q + $R); #print "AA: $newP, Aa: $newQ, aa: $newR\n"; #exit; $newq = ($Q/2 + $R)/($P + $Q + $R); #print "new q: $newq\n"; #exit; $newp = 1 - $newq; # we need to catch the situation of one or other allele reaching fixation if ($newp == 1) { $unew = 99999999999; $g = $generations; $stop = 1;} if ($newq == 1) { $unew = 0; $g = $generations; $stop =1;} #calculate F on the fly if ($stop == 0) {$F = ($newR - $newq**2)/($newq*$newp);} else { $F = 1;} # reset for next gen $AA = $newP; $Aa = $newQ; $aa = $newR; unless ($stop == 1) {$unew = $newp/$newq}; # } $g++; } if ($k > $oldk) { $dk = $k-$oldk;} else {$dk = $oldk - $k;} print "dk: $dk\n"; if ($dk < 0.000001) { $next = 1;} if ($unew < $un) { #k is too high in magnitude print "upper limit changed from $upper_limit"; $upper_limit = $k; print "to $upper_limit\n"; } else { # k is too low in magnitude print "lower limit changed from $lower_limit"; $lower_limit = $k; print "to $lower_limit\n"; } # determine new k by always going half way between upper and lower limits $newk = ($upper_limit + $lower_limit)/2; $oldk = $k; $k = $newk; print "k: $k\n"; if ($matstrat == 0) {print "un reached $unew\n";} else { print "un reached $unew\n";} #exit; $counter++; #if ($counter == 2) { exit;} } until ($next == 1); #exit; # Now run from origin to fixation # assume a starting frequency of 0.00001 (equates to an Ne of 50000) print "evolving.....\n"; if ($fixed_magnitude >0) {$k = $fixedK;} if ($matstrat == 0) {$q0 = 0.03; $u0t = (1-$q0)/$q0; $ulast = $u0t;} else { $q0 = 0.00001; # starting with the original idea of an Ne of 50000 (change this to 0.0001 for Ne 5000 - especially for outbreeders) $p0 = 1- $q0; if ($matstrat == 0) { $f = 0;} $AA = $p0**2 + $p0*$q0*$f; $Aa = 2*$p0*$q0*(1-$f); $aa = $q0**2 + $p0*$q0*$f; } $next = 0; $g = 0; @life = (); do { # if ($matstrat == 0) { # $unew = ($ulast*(1+$ulast))/(1+$ulast-$k); # $ulast = $unew; $g++; # $rec = 1/((1+$unew)**2); # $qnew = sqrt ($rec); # push (@life, $qnew); # #print TEST "$qnew\n"; # if ($qnew > 0.9999) {$next = 1;} # } else { if ($matstrat == 0) { $ms = 1;} $Ps = $ms*($AA + 0.25*$Aa); $Rs = $ms*(1-$k)*($aa + 0.25*$Aa); if ($dr == 0) { $Qs = $ms*(0.5*$Aa); } else { $Qs = $ms*(1-$k)*(0.5*$Aa); } if ($matstrat == 0) { $ms = 0;} $Pc = (1-$ms)*($AA**2 + 2*0.5*$AA*$Aa + 0.25*$Aa*$Aa); $Rc = (1-$ms)*(1-$k)*($aa**2 + 2*0.5*$aa*$Aa + 0.25*$Aa*$Aa); if ($dr == 0) { $Qc = (1-$ms)*(2*$AA*$aa + 0.5*$Aa*$Aa + 2*0.5*$AA*$Aa + 2*0.5*$aa*$Aa); } else { $Qc = (1-$k)*(1-$ms)*(2*$AA*$aa + 0.5*$Aa*$Aa + 2*0.5*$AA*$Aa + 2*0.5*$aa*$Aa); } $P = $Pc + $Ps; $R = $Rc + $Rs; $Q = $Qc + $Qs; # new proportions $newP = ($P)/($P + $Q + $R); $newQ = $Q/($P + $Q + $R); $newR = $R/($P + $Q + $R); $newq = ($Q/2 + $R)/($P + $Q + $R); $newp = 1 - $newq; push (@life, $newq); print "$newq\n"; if ($newq > 0.9999) {$next = 1;} $AA = $newP; $Aa = $newQ; $aa = $newR; # } } until ($next == 1); # Now determine how many generations it would take to get from 3%, 5%, 10% and 15% standing frequency # to the start proportion, to give time of origins for 1 in 1000 plants showing phenotype, 1 in 400, and 1 in 100 plants $pind = $dtwo - 250; $pbronze = $dtwo - 3000; $pUKNeo = $dtwo - 5000; $g = 0; $found5 = 0; $found10 = 0; $found99 = 0; $foundqstart = 0; $found15 = 0; $foundqstop = 0; $found99 = 0; $found80 = 0; $cp = 0; $gpUKNeo = 0; $gpbronze = 0; $gpind = 0; $found1 = 0; $found3 = 0; $found01 = 0; foreach $gen (@life) { if ($found01 == 0) { if ($gen > 0.0001) {$g01 = $g; $found01 = 1;} } if ($found1 == 0) { if ($gen > 0.01) {$g1 = $g; $found1 = 1;} } if ($found3 == 0) { if ($gen > 0.03) {$g3 = $g; $found3 = 1;} } if ($found5 == 0) { if ($gen > 0.05) { $g5 = $g; $found5 = 1;} } if ($found10 == 0) { if ($gen > 0.1) { $g10 = $g; $found10 = 1;} } if ($found15 == 0) { if ($gen > 0.15) { $g15 = $g; $found15 = 1;} } if ($foundqstart == 0) { if ($gen > $qone) { $gqone = $g; $foundqstart = 1;} } if ($foundqstop == 0) { if ($gen > $qtwo) { $gqtwo = $g; $foundqstop = 1;} } else { # it's been found and we can look to see where frequencies are likely to have been by preindustrial levels $cp++; if ($cp == $pind) { $gpind = $gen; } if ($cp == $pbronze) { $gpbronze = $gen; } if ($cp == $pUKNeo) { $gpUKNeo = $gen; } } if ($found80 == 0) { if ($gen > 0.8) { $g80 = $g; $found80 = 1;} } if ($found99 == 0) { if ($gen > 0.99) { $gfix = $g; $found99 = 1;} $g++; } } # calculate various time or origins $too = $gqone + $firstd; $too1 = ($gqone - $g1) + $firstd; $too3 = ($gqone - $g3) + $firstd; $too5 = ($gqone - $g5) + $firstd; $too10 = ($gqone - $g10) + $firstd; $too15 = ($gqone - $g15) + $firstd; $tof80 = $secondd - ($g80 - $gqtwo); $tof99 = $secondd - ($gfix - $gqtwo); $too01 = ($gqone - $g01) + $firstd; if ($gpind == 0) { $gpind = 1; } # because q was fixed before it got to this age if ($gpbronze == 0) { $gpbronze = 1;} # see above if ($gpUKNeo == 0) {$gpUKNeo = 1;} # and above again print TEST "$firstd\t$secondd\t$firstf-$secondf\t\t$k\t\t$too\t$too01\t$too1\t$too3\t$too5\t$too10\t$too15\t$tof80\t$tof99\t$gpind\t$gpbronze\t$gpUKNeo\n"; # exit; } } $c++; } $cc++; $t++; } close TEST; ###################### # SUBROUTINES # ###################### sub estimate_inbreeding_coefficient { my $ms = $_[0]; my $p = 0.5; my $q = 0.5; #start off with Hardy-Weinberg proportions, and let the frequencies settle out my $AA = $p**2; my $Aa = 2*$p*$q; my $aa = $q**2; my $next = 0; my $oldF = 0; my $Ps = (); my $Rs = (); my $Qs = (); my $Pc = (); my $Rc = (); my $Qc = (); my $P = (); my $R = (); my $Q = (); my $newP = (); my $newR = (); my $newQ = (); my $F = (); my $df = (); print "Estimating F:\n"; do { $Ps = $ms*($AA + 0.25*$Aa); $Rs = $ms*($aa + 0.25*$Aa); $Qs = $ms*(0.5*$Aa); $Pc = (1-$ms)*($AA**2 + 2*0.5*$AA*$Aa + 0.25*$Aa*$Aa); $Rc = (1-$ms)*($aa**2 + 2*0.5*$aa*$Aa + 0.25*$Aa*$Aa); $Qc = (1-$ms)*(2*$AA*$aa + 0.5*$Aa*$Aa + 2*0.5*$AA*$Aa + 2*0.5*$aa*$Aa); $P = $Pc + $Ps; $R = $Rc + $Rs; $Q = $Qc + $Qs; # new proportions $newP = ($P)/($P + $Q + $R); $newQ = $Q/($P + $Q + $R); $newR = $R/($P + $Q + $R); $newq = ($Q/2 + $R)/($P + $Q + $R); $newp = 1 - $newq; #calculate F on the fly $F = ($newR - $newq**2)/($newq*$newp); print "$F\n"; if ($F > $oldF) {$dF = $F- $oldF;} else {$dF = $oldF - $F;} if ($dF < 0.0000000001) { $next = 1;} if ($dF == 0) {$next = 1;} $oldF = $F; # reset for next gen $AA = $newP; $Aa = $newQ; $aa = $newR; } until ($next == 1); return $F; } sub estimate_q { my $freq = $_[0]; my $f = $_[1]; my $dr = $_[2]; my $q = 0.00001; $lowerlimit = $q; $upperlimit = 0.99999; my $next = 0; my $dq = $upperlimit - $lowerlimit; my $newq = $q; my $p = 1- $q; my $newp = $p; my $newfreq = 0; do { if ($dr == 0) { $newfreq = $q**2 + ($p*$q*$f); #print "newfreq: $newfreq\n"; } else { $newfreq = $q**2 + ($p*$q*$f) + 2*$p*$q - 2*$p*$q*$f; } if ($newfreq > $freq) { # q is too high $upperlimit = $q; } if ($newfreq < $freq) { # q is too low $lowerlimit = $q; } $newq = $lowerlimit + (($upperlimit-$lowerlimit)/2); #print "newq: $newq\n"; if ($newq > $q) { $dq = $newq-$q;} else { $dq = $q - $newq;} if ($dq < 0.000001) { $next = 1}; #print "q: $q\n"; $q = $newq; $p = 1 - $q; } until ($next == 1); return $q; }