############################
# simulate serial founders #
# and track diversity      #
# Robin Allaby UoW 2018    #
############################

###################################
# v2 update take into account     #
# mating strategy and incremental #
# change over time                #
###################################

#####################################
# v4 update select individuals from #
# the population, generate gametes  #
# to make next generation.          #
#####################################

######################################
# v5 update introduce a mutation rate#
# so allele frequency array will     #
# expand during the course of the    #
# simulation                         #
######################################

######################################
# v6 update introduce a user defined #
# starting heterozygosity to reflect #
# reality better                     #
######################################

$N = $ARGV[0]; #population size
$Nb = $ARGV[1]; # founders
$a = $ARGV[2]; # no. alleles
$f = $ARGV[3]; #no. founder events
$m = $ARGV[4]; # model of gene frequency generation 0, uniform, x, average frequency of most common allele
$ms = $ARGV[5]; # mating strategy
$deltams = $ARGV[6]; # rate of change of mating strategy
$mu = $ARGV[7];
$fast = $ARGV[8]; #fast option (1) continually cleans out allele array losing lost lineages, and losing tracking allele histories
$starthet = $ARGV[9];
$startms = $ARGV[10]; # which mating system to use to calculate the starting frequencies

open (OUT, ">founder.out") or die "Dead\n";
open (TEST, ">testfile") or die "No testfile\n";

#initialize by attributing allele frequencies - drawn from a uniform distribution
@allele_freqs = ();
@raw_freqs = ();
$x = 0;

while ($x < $a) {
	$allele = int rand(1000);
	$allele = ($allele+1)/1000;
	push (@raw_freqs, $allele);


$x++;
}
#print "m: $m\n";
#print "raw freqs before: @raw_freqs\n";
#if ($m > 0) {
#	#amplify the first frequency to reflect the input frequency average
#	$value = $raw_freqs[0];
#	$factor = $m/(1-$m);
#	$value = $m*$factor; # assumes equable frequencies (uniform), amplified by a factor that would boost
#				# the equable frequency up to the $m frequency
#	splice (@raw_freqs, 0, 1, $value);
#}
#print "after: @raw_freqs\n"; exit;
if ($m == 1) {


}

$total = 0;
foreach $raw (@raw_freqs) { $total = $total + $raw;}
foreach $raw (@raw_freqs) {
	$freq = $raw/$total;
	push (@allele_freqs, $freq);
}

$F = estimate_inbreeding_coefficient ($ms);
print TEST "F: $F\t";

print "allele frequencies: @allele_freqs\n";
# work out diversity

$allele_freqs = join ("\t", @allele_freqs);

if ($m == 1) {
	print "searching for allele frequencies....\n";
	$startF = estimate_inbreeding_coefficient($startms);
	$next = 0;
		$het = calculate_heterozygosity ($allele_freqs, $startF);
		if ($het > $starthet) { $gounder = 1;} else {$gounder = 0;}
		@old_allele_freqs = (); @old_allele_freqs = @allele_freqs;
	do {	
		# randomly take 0.01 off one frequency and add to another

		$loser = rand ($a);
		$winner = rand ($a);
		$losingvalue = $allele_freqs[$loser]; $losingvalue = $losingvalue - 0.001; 
		if ($losingvalue < 0) {$losingvalue = 0;}
		$winningvalue = $allele_freqs[$winner]; $winningvalue = $winningvalue + 0.001;
		splice (@allele_freqs, $loser,1,$losingvalue);
		splice (@allele_freqs, $winner,1,$winningvalue);
		$raw_freqs = join ("\t", @allele_freqs);
		$allele_freqs = convert_raw_to_pdf ($raw_freqs); # possible maniuplations above can result in frequencies no longer summing to 1	
		@allele_freqs = (); @allele_freqs = split (/\t/, $allele_freqs);
		$newhet = calculate_heterozygosity ($allele_freqs, $startF);
		$accept = 0;
		if ($newhet < $het) { if ($gounder == 1) {$accept = 1;} else {$accept = 0;} } else { if ($gounder == 1) {$accept = 0;} else {$accept = 1;}}
		if ($accept == 1) { @old_allele_freqs = (); @old_allele_freqs = @allele_freqs; $het = $newhet;} else {@allele_freqs = (); @allele_freqs = @old_allele_freqs;}
		if ($gounder == 1) { if ($het < $starthet) {$next = 1;}} else {if ($het>$starthet) {$next = 1;}}
		print "."; 

	} until ($next == 1);

	# take out your dead....
	$allele_freqs = clean_out_the_dead ($allele_freqs);
	@allele_freqs = (); @allele_freqs = split (/\t/, $allele_freqs);
	$a = scalar (@allele_freqs);	
}
print "allele frequncies: @allele_freqs\n";
print "het: $het\n";
#exit;

#$het = calculate_heterozygosity ($allele_freqs);
#print "heterozygosity: $het\n"; exit;
$g = 0;

		#draw out N individuals from infinite population derive new frequencies
		$n = 0; @new_raw = (); #@het_array = ();
		foreach $al (@allele_freqs) {push (@new_raw, 0);}
		$allele_freqs = join ("\t", @allele_freqs);
		print TEST "allele freqs: @allele_freqs\n";

		# to check my probabilities here
		$flop = 0;
		foreach $e (@allele_freqs) {
			$hetslist = one_times_all ($allele_freqs, $flop);
			$hetstot = add_up_array($hetslist);
			$hetsprop = ($hetstot-($F*$hetstot));
			$phom = ($e*$e) + ($F*$hetstot);
			$conditionalp = $phom/($phom+$hetsprop);
			print TEST "allele $e phom: $phom\t conditional: $conditional\thetstotal: $hetstot\thetsprop: $hetsprop\n";
			$flop++;
		}

			$genotype_matrix4 = make_genotype_matrix ($allele_freqs, $F);
			$alleleno = scalar @allele_freqs;
			$allelehi = $alleleno - 1;
			print TEST "genotype matrix: $genotype_matrix4\n";
		while ($n < $N) {

			$ind1 = draw_individual2 ($genotype_matrix4, $a);
			#print TEST "ind1:\t$ind1\n";
			$seed = generate_seed (1000000000000);
			if ($seed > $ms) { $ind2 = draw_individual2 ($genotype_matrix4, $a);
			#print TEST "$ind1\n";
			} else { $ind2 = $ind1;}
			$prog = mate_individuals ($ind1, $ind2);

			@prog = (); @prog = split (/\t/, $prog);
			$ad = $prog[0]; $sad = $prog[1];

			# opportunity for mutation
			@newprog = ();
			foreach $allele (@prog) {
				$check = check_mutation($mu);
				if ($check ==1) {
					# we add another allele to the list
					$allelehi++;
					#$a++;
					push (@newprog, $allelehi);
					push (@new_raw, 0);
				} else {
					push (@newprog, $allele);
				}
			}			
			@prog = (); @prog = @newprog;
			$ad = $prog[0]; $sad = $prog[1];

		#	print TEST "$ad\t$sad\n";

			#@ind = ();
			#@ind = split (/\t/, $ind1);
			#$ad = $ind[0]; $sad = $ind[1];

			$curr = $new_raw[$ad]; $curr++; splice (@new_raw, $ad, 1, $curr);
			$curr = $new_raw[$sad]; $curr++; splice (@new_raw, $sad, 1, $curr);
	$n++;
		}

		$new_raw = join ("\t", @new_raw);
		if ($fast == 1) {
		$new_raw = clean_out_the_dead($new_raw);} # this will make it run faster by losing dead lineages, but lose allele tracking
		$new_freqs = convert_raw_to_pdf ($new_raw);
		#@new_freqs = split (\t\, $new_freqs);
		print TEST "new freqs: $new_freqs\n";
		$het = calculate_heterozygosity ($new_freqs, $F);
	
		print OUT "$het\n";

while ($g < $f) {		
		# begin with founder
		
		if ($g == 0) {print TEST "$g\t$new_freqs\n";}
		#print TEST "go in :\t$new_freqs\n";
		$new_raw = sample_N_from_pdf ($Nb, $new_freqs, $F, $ms, $mu);
                if ($fast == 1) {
                $new_raw = clean_out_the_dead($new_raw);} # this will make it run faster by losing dead lineages, but lose allele tracking

		$new_freqs = convert_raw_to_pdf ($new_raw);
		print TEST "$g\tnew freqs\t$new_freqs\tnew raw:\t$new_raw\n";
		#expand to N
	#exit;
		#print TEST "go in :\t$new_freqs\n";
		$new_raw = sample_N_from_pdf ($N, $new_freqs, $F, $ms, $mu);
                if ($fast == 1) {
                $new_raw = clean_out_the_dead($new_raw);} # this will make it run faster by losing dead lineages, but lose allele tracking
		$new_freqs = convert_raw_to_pdf ($new_raw);
		#print TEST "$g\t$new_freqs\n";
		print TEST "\tnew freqs:\t$new_freqs\tnew raw:\t$new_raw\n";
		$het = calculate_heterozygosity ($new_freqs, $F);
                print OUT "$het\n";
	
	$g++; 
	print "$g\n";
	$ms = $ms + $deltams;
	unless ($deltams == 0) {$F = estimate_inbreeding_coefficient($ms);}

}


close OUT;
close TEST;

#########################
# SUBROUTINES           #
#########################
sub clean_out_the_dead {
			$allele_freqs5 = $_[0];
			# remove all zero values from the array to stop inordinate expansion
			my @allele_freqs5 = (); my $frequency1 = (); my @allele_freqs6 = ();
			my $allele_freqs6 = ();

			@allele_freqs5 = split (/\t/, $allele_freqs5);
			foreach $frequency1 (@allele_freqs5) { 
				unless ($frequency1 == 0) { push (@allele_freqs6, $frequency1);}
			}
			$allele_freqs6 = join ("\t", @allele_freqs6);
		return $allele_freqs6;
}

sub check_mutation {
			my $murate = $_[0];
			my $seed7 = (); my $mutate = 0;
			my $seed7 = generate_seed (1000000000000000000000);
			if ($seed7 < $murate) {$mutate = 1;}
		return $mutate;
}
sub mate_individuals {
			my $ind11 = $_[0];
			my $ind12 = $_[1];
			my @prog1 = (); my @ind11 = (); my @ind12 = ();
			my $seed2 = (); my $gam1 = (); my $gam2 = ();
			my $prog1 = ();

			@ind11 = split (/\t/, $ind11); @ind12 = split (/\t/, $ind12);
			#print TEST "ind1:$ind11[0]\t$ind11[1]\tind2:$ind12[0]\t$ind12[1]\n";			

			$seed2 = generate_seed (10000000000000);
			if ($seed2 > 0.5) { $gam1 = $ind11[1];} else {$gam1 = $ind11[0];}
			$seed2 = generate_seed (10000000000000);
                        if ($seed2 > 0.5) { $gam2 = $ind12[1];} else {$gam2 = $ind12[0];}	
			push (@prog1, $gam1, $gam2);
		$prog1 = join ("\t", @prog1);
	return $prog1;
}


sub make_genotype_matrix {
			my $allele_freqs4 = $_[0];
			my $F = $_[1];
			# new method - make matrix of genotypes expected and draw from that (no biased gametes!)

			my @genotype_matrix = (); my $x2= (); my $allele3 = (); my $hetstot2 = (); my $adjusted_hettot = ();
			my $homo = (); my $x3 = (); my $allele4 = (); my $het2 = (); my $genotype_matrix = ();
			my @allele_freqs4 = (); my $hetstot2list = ();

			@allele_freqs4 = split (/\t/, $allele_freqs4);
			#print TEST "frequencies going in: $allele_freqs4\n";
			$x2 = 0;
			
			foreach $allele3 (@allele_freqs4) {
					$hetstot2list = one_times_all ($allele_freqs4, $x2);
					$hetstot2 = add_up_array ($hetstot2list);
					$adjusted_hettot = $hetstot2*$F;
					#print TEST "hetstot2:\t$hetstot2\tadjusted:\t$adjusted_hettot\n";
					$homo = ($allele3*$allele3) + $adjusted_hettot;
					#print TEST "homo:\t$homo\n";
					$x3 = 0;
					foreach $allele4 (@allele_freqs4) {
							if ($x3 == $x2) {
								push (@genotype_matrix, $homo);
							} else {
								$het2 = ($allele4*$allele3) - ($F*($allele4*$allele3));
								push (@genotype_matrix, $het2);
							}
					$x3++;
					}

					$x2++; 	
			}
		$genotype_matrix = join ("\t", @genotype_matrix);
		#print TEST "genotype_matrix: $genotype_matrix\n";
		return $genotype_matrix;

}

sub draw_individual2 {
			my $genotype_matrix2 = $_[0];
			my $a3 = $_[1];

			$seed6 = generate_seed (10000000000000); 
			$ad6 = sample_from_distribution ($genotype_matrix2, $seed6);
			#print TEST "ad6: $ad6\n";
		
			$ad6++; # have to switch from 0 counting to 1 counting
			$genotype2 = retrieve_genotype ($ad6, $a3); 			
		return $genotype2;

}

sub retrieve_genotype {
                        my $ad7 = $_[0];
                        my $a4 = $_[1];

                        my @genotype_matrix = (); my $first = (); my $second = (); my @genotype3 = (); my $genotype3 = ();
                        my $secondint = (); my $secondfrac = (); my $firstfrac = (); my $whole = ();
			my $number1 = (); my @number1 =(); my $int = (); my $frac = (); 


                        @genotype_matrix = split (/\t/, $genotype_matrix);


                        $whole = $ad7/$a4;
                        #print "whole: $whole\n";
                        $number1 = break_up_number ($whole);
                        @number1 = split (/\t/, $number1);
                        $int = $number1[0];
                        $frac = $number1[1];

                        #if ($whole>$int) { $int++;}
                        #$seconda = $secondint-1;
                        $firstfrac = ($frac*$a4);
                        if ($firstfrac == 0) {$first = ($a4-1);} else {$first = $firstfrac - 1;}
                        if ($frac == 0) {$int--;}
                        #print "first:\t$first\nsecond:\t$int\n";
                        #if ($first < 1) {$first = 0;
                        #print TEST "first corrected to $first";
                        #}
                        #print TEST "\n";
                push (@genotype3, $first, $int);
                $genotype3 = join ("\t", @genotype3);
        return $genotype3;

}

sub break_up_number {
        #my alternative to the int function which causes black magic weirdness
                        my $number = $_[0];
                        my @number = (); my $next = 0;
                        my $reduced = $number;
                        my $x7 = 0; my $integer = (); my $fraction = (); my $broken_number = ();
                        do {
                                $reduced = $reduced - 1;
                                if ($reduced < 0) {$next = 1;}
                                $x7++;

                        } until ($next==1);
                        #unless ($reduced == 0) {$reduced = $reduced +1;}
                        $integer = $x7-1;
                        $fraction = $number-$integer;
                        push (@number, $integer, $fraction);
                        $broken_number = join ("\t", @number);
                        #print "broken number: integer($integer), fraction($fraction)\n";
        return $broken_number;

}


sub draw_individual {
			my $allele_freqs1 = $_[0];
			my $F = $_[1];
			my $seed1 = (); my $ad1 = (); my $hetslist1 = (); my $hetstot1 = (); my $allele1 = ();
			my $hetsprop1 = (); my $homprop1 = (); my $phom1 = (); my $sad1 = (); my $next1 = ();
			my $tries1 = (); my @genotype1 = (); my $genotype1 = ();

			print TEST "drawing individual from:\t$allele_freqs1\t";
			$seed1 = generate_seed (10000000000000); 
			$ad1 = sample_from_distribution ($allele_freqs1, $seed1);
			print TEST "ad: $ad1\t";
                                $hetslist1 = one_times_all ($allele_freqs1, $ad1);
                                $hetstot1 = add_up_array ($hetslist1);
				$allele1 = $allele_freqs1[$ad1];
				$hetsprop1 = ($hetstot1-($F*$hetstot1)); #conditional hetsprop - only half of HWE because we have first allele
				#remaining frequency must be made up of homs
                                $homprop1 = ($allele1*$allele1) + ($F*$hetstot1); # this is the probability if drawing from start - we already know
				if ($homprop1 == 0) { $phom1 = 0;
				#print TEST "homprop failure\n";
				} else {
				$phom1 = $homprop1/($homprop1+$hetsprop1); # prob of homology given the first allele....
				}


			$seed1 = generate_seed(1000000);
			if ($seed1 < $phom1) {
				$sad1 = $ad1; } else {
				$next1 = 0; $tries1 = 0;
				do {
				$seed1 =  generate_seed (1000000);
				$sad1 = sample_from_distribution ($allele_freqs1, $seed1);
				unless ($sad1 == $ad1) {$next1 = 1;}
				$tries1++; if ($tries1 == 1000) { print TEST "hit 1000\t"; $next1 = 1;}
				} until ($next1 == 1);
				}
				push (@genotype1, $ad1, $sad1);
				$genotype1 = join ("\t", @genotype1);
		print TEST "genotype: $genotype1\n";
		return $genotype1;

}
sub estimate_inbreeding_coefficient {
				my $ms = $_[0];
				my $p = 0.5; my $q = 0.5;
			
				#start off with Hardy-Weinberg proportions, and let the frequencies settle out

				my $AA = $p**2; my $Aa = 2*$p*$q; my $aa = $q**2; my $next = 0; my $oldF = 0;
				my $Ps = (); my $Rs = (); my $Qs = (); my $Pc = (); my $Rc = (); my $Qc = ();
				my $P = (); my $R = (); my $Q = (); my $newP = (); my $newR = (); my $newQ = ();
				my $F = (); my $df = ();


				print "Estimating F:\n";

				do {
					$Ps = $ms*($AA + 0.25*$Aa);
					$Rs = $ms*($aa + 0.25*$Aa);
					$Qs = $ms*(0.5*$Aa);

					$Pc = (1-$ms)*($AA**2 + 2*0.5*$AA*$Aa + 0.25*$Aa*$Aa);
					$Rc = (1-$ms)*($aa**2 + 2*0.5*$aa*$Aa + 0.25*$Aa*$Aa);
					$Qc = (1-$ms)*(2*$AA*$aa + 0.5*$Aa*$Aa + 2*0.5*$AA*$Aa + 2*0.5*$aa*$Aa);
					
					$P = $Pc + $Ps;
					$R = $Rc + $Rs;
					$Q = $Qc + $Qs;

					# new proportions
					$newP = ($P)/($P + $Q + $R);
					$newQ = $Q/($P + $Q + $R);
					$newR = $R/($P + $Q + $R);

					$newq = ($Q/2 + $R)/($P + $Q + $R);
					$newp = 1 - $newq;


					#calculate F on the fly
					$F = ($newR - $newq**2)/($newq*$newp);
					#print "$F\n";
					if ($F > $oldF) {$dF = $F- $oldF;} else {$dF = $oldF - $F;}
					if ($dF < 0.0000000001) { $next = 1;}
					if ($dF == 0) {$next = 1;}
					$oldF = $F;

					# reset for next gen
					$AA = $newP;
					$Aa = $newQ;
					$aa = $newR;					

				} until ($next == 1);
		return $F;
}


sub sample_N_from_pdf {
			my $N = $_[0];
			my $pdf1 = $_[1];
			my $F = $_[2];
			my $ms = $_[3];
			my $mu = $_[4];
		
			my @pdf1 = (); my $allele2 = ();
			@pdf1 = split (/\t/, $pdf1);
			my @new_raw1 = (); my $a2 = (); my $n2 = 0; my $seed3 = (); my $curr1 = (); my $ad2 = ();
		 	my $sad2 = (); my $ind21 = (); my $ind22 = (); my $prog2 = (); my @prog2 = ();
			my $scurr1 = (); my $new_raw1 = (); #my $cr1 = ();
			my $a6 = (); my @newprog2 = (); my $check1 = ();
			$a6 = scalar @pdf1;
			foreach $a2 (@pdf1) {push (@new_raw1, 0);}
	#		print TEST "pdf is :\t$pdf1\n";
			
			$genotype_matrix3 = make_genotype_matrix ($pdf1, $F);
			#print "genotype matrix: $genotype_matrix3, a6: $a6\n";
			$totsall = scalar @pdf1;
			$allelehi2 = $totsall - 1;
			while ($n2 < $N) {
				

			$ind21 = draw_individual2 ($genotype_matrix3, $a6);
			$seed3 = generate_seed (1000000000000);
			if ($seed3 > $ms) { $ind22 = draw_individual2 ($genotype_matrix3, $a6);
		#	print TEST "0\t";
			} else { 
		#	print TEST "1\t";
			$ind22 = $ind21;}
			$prog2 = mate_individuals ($ind21, $ind22);

			@prog2 = (); @prog2 = split (/\t/, $prog2);
			$ad2 = $prog2[0]; $sad2 = $prog2[1];
			# opportunity for mutation
			@newprog2 = ();
			foreach $allele2 (@prog2) {
				$check1 = check_mutation($mu);
				if ($check1 ==1) {
					# we add another allele to the list
					$allelehi2++;
					#$a++;
					push (@newprog2, $allelehi2);
					push (@new_raw1, 0);
				} else {
					push (@newprog2, $allele2);
				}
			}		
			$check1 = 0;	
			@prog2 = (); @prog2 = @newprog2;
			$ad2 = $prog2[0]; $sad2 = $prog2[1];

			#print TEST "$ad2\t$sad2\n";

                       #my @ind = ();
                       #@ind = split (/\t/, $ind1);
                       #$ad = $ind[0]; $sad = $ind[1];


			$curr1 = $new_raw1[$ad2]; $curr1++; splice (@new_raw1, $ad2, 1, $curr1);
			$scurr1 = $new_raw1[$sad2]; $scurr1++; splice (@new_raw1, $sad2, 1, $scurr1);

                	$n2++;
                	}
                $new_raw1 = join ("\t", @new_raw1);
		#print TEST "new raw is:$new_raw1\n";
	#	exit;
		return $new_raw1;
}


sub convert_raw_to_pdf {
			my $list1 = $_[0];
			my @list1 = ();
			my @list1 = split (/\t/, $list1);
			my $total1 = add_up_array ($list1);
			my $bit1 = (); my $val1 = (); my @pdf2 = (); my $pdf2 = ();
	

			foreach $bit1 (@list1) {
				$val1 = $bit1/$total1;
				push (@pdf2, $val1);
			}
		$pdf2 = join ("\t", @pdf2);
		return $pdf2;

}

sub sample_from_distribution {
			my $pdf3 = $_[0];
		#	print TEST "pdf3:\t$pdf3\n";

			my $seed4 = $_[1];
		#	print TEST "seed4:\t$seed4\t";
			my @pdf3 = ();
				@pdf3 = split (/\t/, $pdf3);
			my $c1 = 0;
			my $val = (); 
			my $cum = 0;
			my $found = 0; my $ad4 = ();
			foreach $val (@pdf3) {
				$cum = $cum + $val;
				if ($seed4 < $cum) {
						if ($found == 0) {
							$ad4 = $c1; $found = 1;
						}
				}
			$c1++;
			}
		if ($pdf3[$ad4] == 0) {print TEST "fault:\npdf:$pdf3\nseed:$seed4\nad:$ad4\n";exit;}
		#print TEST "chose $ad4\n";
		return $ad4;
		}

sub generate_seed {
		my $n = $_[0];
		my $seed5 = int rand ($n);
		$seed5 = (($seed5+0.00000000000001)/($n+0.00000000000001));
		return $seed5;
}

sub add_up_array {
		my $list = $_[0];
		my @list = split (/\t/, $list);
		my $total2 = 0;
		my $bit = (); 
		foreach $bit (@list) {$total2 = $total2 + $bit;}
	return $total2;

}

sub one_times_all {
			my $allele_freqs2 = $_[0];
			my $ad5 = $_[1];

			my @allele_freqs2 = (); my @binaries = ();
			@allele_freqs2 = split (/\t/, $allele_freqs2);
			
			my $x1 = 0; my $all = (); my $bin = ();
			foreach $all (@allele_freqs2) {
				unless ($x1 == $ad5) {
					$bin = $all*$allele_freqs2[$ad5];
					push (@binaries, $bin);
				}
			$x1++;
			}
			$binaries = join ("\t", @binaries);
	return $binaries;

}

sub calculate_heterozygosity {
			my $allele_freqs3 = $_[0];
			my $F = $_[1];
			my @allele_freqs3 = ();
			my $phom2 = ();
			my @homozygosities = ();	
			my $total_hom = ();
			my $het1 = ();
			my $homozygosities = ();
			@allele_freqs3 = split (/\t/, $allele_freqs3);

			#calculate all homozygosity probabilities
			my $c = 0;
			foreach $allele (@allele_freqs3) {
			#	print "$allele\n";
				$hetslist = one_times_all ($allele_freqs3, $c);
				$hetstot = add_up_array ($hetslist);
				$phom2 = ($allele*$allele) + ($F*$hetstot);
				push (@homozygosities, $phom2);
				$c++;

			}
			$homozygosities = join ("\t", @homozygosities);
			$total_hom = add_up_array ($homozygosities);
			#print "homozygosities: $homozygosities\n";
			#print "total hom: $total_hom\n";
		$het1 = 1 - $total_hom;
		return $het1;

}