use strict; use warnings; our $family = { q{ALB} => q{IE}, q{ANG} => q{IE}, q{CHU} => q{IE}, q{GOH} => q{IE}, q{GOT} => q{IE}, q{GRC} => q{IE}, q{LAT} => q{IE}, q{LIT} => q{IE}, q{NON} => q{IE}, q{SAN} => q{IE}, q{SGA} => q{IE}, q{HUN} => q{Ural}, q{FIN} => q{Ural}, q{CHM} => q{Ural}, q{YRK} => q{Ural}, }; our $critical = .05; sub Family($) { my $str = shift(@_); my $parts = [split(m{-}, $str)]; my $fam = $family->{shift(@$parts)}; while (defined(my $part = shift(@$parts))) { my $part_fam = $family->{$part}; next if $part_fam eq $fam; return q{mixed}; } return $fam; } sub main() { my $head = 1; my $base; my $lang1; my $lang2; my $match; my $tally = {IE => 0, Ural => 0, mixed=> 0, mismatch => 0}; my $N = {}; my $percent_impr; my $biggest_improvement = 0; my $biggest_improved_pair; while (defined(my $line = <>)) { if ($line =~ m{language1 = q{(.+?)}}) { $lang1 = $1; print(qq{\n$lang1 ~}); } elsif ($line =~ m{language2 = q{(.+?)}}) { $lang2 = $1; my $fam1 = Family($lang1); my $fam2 = Family($lang2); if ($fam1 eq $fam2) { $match = $fam1; $N->{$match}++; } else { $N->{mismatch}++; $match = undef; } print(qq{ $lang2:\n}); } elsif ($line =~ m{^([\d\.]+) base distance$}) { $base = $1; print $line; } elsif ($line =~ m{^([\d\.]+) average rearranged distance$}) { my $rearr = $1; print $line; my $diff = $rearr - $base; $percent_impr = sprintf(q{%.0f}, 100 * $diff / $base); print qq{Percent improvement: $percent_impr\n}; } elsif ($line =~ m{^([\d\.]+) p$}) { my $p = $1; print $line; if ($p <= $critical) { if (defined($match)) { $tally->{$match}++; print(qq{--Successful $match match\n}); } else { $tally->{mismatch}++; print(qq{--Unexpected match across families\n}); } if ($percent_impr > $biggest_improvement) { $biggest_improvement = $percent_impr; $biggest_improved_pair = qq{$lang1~$lang2}; } } else { if (defined($match)) { print(qq{--Failed to find intra-$match relationship.\n}); } else { print(qq{--No relationship detected across families.\n}); } } } elsif ($line =~ m{^[A-Z]{3,3}}) { $head = 0; } elsif ($head) { print($line); } } print(qq{\n\n}); my $goods = 0; my $all = 0; foreach my $fam (qw{IE Ural}) { my $this_N = $N->{$fam} || 0; print(qq{Of $this_N $fam pairs, detected $tally->{$fam}\n}); $goods += $tally->{$fam}; $all += $N->{$fam}; } if ($all) { my $percent = sprintf(q{%.0f}, 100 * $goods / $all); print($percent . q{% of intrafamily connections found.}, qq{\n}); } if ($N->{mismatch}) { $percent = sprintf(q{%.0f}, 100 * $tally->{mismatch} / $N->{mismatch}); print($percent . q{% of interfamily connections posited.}, qq{\n}); } if (defined($biggest_improved_pair)) { print(qq{Closest pair: $biggest_improved_pair at $biggest_improvement\%.}); } else { print(qq{No pairs are significantly connected.\n}); } } main();