use strict; use warnings; use bytes; package Signif::Pron; use fields qw{rating pron extract}; #rating starts off as deriv= value, then gets adjusted according to the # sem's retention rate and also the global weight_derivs setting use Signif::PlaceVoiceBundle; use Signif::PlaceBundle; use Signif::CoronalityBundle; sub Get_Real_Number($) { my $in = shift(@_); return 0 if !defined($in) or $in eq q{}; die($in) unless $in =~ m{^[\d.]+$} and $in <= 1; return $in; } =for comment We don't technically need to suppress a redundant thing if the thing it's redundant to is discarded for some other reason. But that reason tends to be ingerited by the redundant thing anyway, so let's just blindly follow the linguist's designation: suppress redundant items regardless of what they're redundant with. =cut sub new { my $type = shift(@_); my $suppress_loans_at = shift(@_); my $suppress_redundants = shift(@_); my $suppress_motivateds_at = shift(@_); my %attrs = @_; my Signif::Pron $p = fields::new($type); my $r = $attrs{r}; #print(qq{ Pron r=$r\n}); die($r) if (defined($r) and $r !~ m{^\d+$}); die($r) if exists($attrs{ERR}); if (Get_Real_Number($attrs{loan}) >= $suppress_loans_at) { print(qq{ Suppressing $r because loan = $attrs{loan}.\n}); return undef; } if (Get_Real_Number($attrs{motiv}) >= $suppress_motivateds_at) { print(qq{ Suppressing $r because motiv = $attrs{motiv}.\n}); return undef; } $p->{rating} = Get_Real_Number($attrs{deriv}); my $redund_str = $attrs{redund} || q{}; my $redund_arr = [split(m{\s+}, $redund_str)]; if ($suppress_redundants and $redund_str ne q{}) { print(qq{ Suppressing $r because redundant to $redund_str\n}); return undef; } #Just some format checking: foreach my $red (@$redund_arr) { if ($red =~ m{^([A-Z]+)(\d+)$}) { #push(@{$p->{redund}}, [$1, $2]); } else {die($red);} } $p->{pron} = q{}; return $p; } sub add_char { my Signif::Pron $p = shift(@_); $p->{pron} .= shift(@_); return $p; } our $ipa_pre_modifier = qr{ˈ|ˌ|↓}; our $ipa_base_consonant = qr{β|ʙ|ç|ð|ɖ|ɱ|ɡ|ɣ|ɥ|ʜ|ɦ|ɲ|ʝ|ɬ|ɮ|ɫ|ʎ|ɺ|ʟ|ɭ|ɰ|ŋ|ɴ|ɳ|ʘ|ɸ|ʁ|ɽ|ɹ|ɻ|ʀ|ʃ|ʂ|ɕ|θ|ʈ|ṽ|ʋ|ʍ|χ|ɧ|ħ|ʒ|ʑ|ʐ|ɾ|ɫ|ʔ|ʕ|ǃ|ǀ|ǁ|ǂ|ʢ|ʡ|[bcdfhjklmnpqrstvwxz]}; our $ipa_base_vowel = qr{ȁ|â|á|à|ā|ǎ|ă|ắ|ằ|ḁ|ä|ǟ|ã|ɑ|ɘ|ə|ɚ|ȅ|ê|é|ḛ|è|ē|ě|ë|ẽ|ɛ|ɶ|ȉ|î|í|ḭ|ì|ī|ǐ|ĭ|ï|ḯ|ĩ|ɪ|ȍ|ô|ó|ò|ō|ǒ|ő|ŏ|ö|ȫ|õ|ṍ|ȭ|õ|ṍ|ȭ|ɔ|ɒ|ȕ|û|ú|ṵ|ù|ū|ǔ|ṳ|ű|ŭ|ü|ǘ|ǜ|ǖ|ǚ|ũ|ṹ|ũ|ṹ|ʊ|ʌ|ỹ|ʏ|ŷ|ý|ỳ|ȳ|ẙ|ÿ|ɨ|ø|ɜ|ɝ|ɞ|ɐ|ɤ|ɵ|œ|æ|ǽ|ǣ|ʉ|↑|[aeiouy]}; our $ipa_base_letter = qr{$ipa_base_consonant|$ipa_base_vowel}; our $ipa_post_modifier = qr{⁀|‿|̺|̘|̏|̜|̪|̴|̂|ˠ|ʰ|́|ʲ|̰|ˡ|̀|̻|̄|ⁿ|̼|̞|̹|̙|̝|̌|̤|̋|̬|ʷ|̆|̥|ˤ|̟|̠|̩|̈|̯|̃|̚|ː|ˑ|ʹ}; our $ipa_modified_letter = qr{$ipa_pre_modifier*$ipa_base_letter$ipa_post_modifier*}; our $ipa_re = qr{$ipa_modified_letter+}; sub end { my Signif::Pron $p = shift(@_); my $pron = $p->{pron}; return $p if $pron eq q{}; die(qq{{$pron} starts with space}) if $pron =~ m{^\s}; die(qq{{$pron} ends with space}) if $pron =~ m{\s$}; if ($pron =~ $ipa_re) {return;} die(qq{{$pron} is not legal IPA}); } sub pron {my Signif::Pron $p = shift(@_); return $p->{pron};} sub adjust_rating { my Signif::Pron $p = shift(@_); my $weight_derivs = shift(@_); # 0..100 my $sem_retention_rate = shift(@_); # 0..100 my $deriv_weight = $p->{rating} * $weight_derivs / 100; # 0..1 my $rating = sprintf(q{%.0f}, $sem_retention_rate - ($sem_retention_rate * $deriv_weight)); print(qq{ /$p->{pron}/, rating=$rating\n}); return $rating; } our $consonant_re = qr{$ipa_base_consonant$ipa_post_modifier*}; our $vowel_re = qr{$ipa_base_vowel$ipa_post_modifier*}; =for comment Extract the part that will be used in the tests. =cut sub extract_part { my Signif::Pron $p = shift(@_); my $compare_part = shift(@_); #e.g., 'C1' my $pron = $p->{pron}; if ($compare_part eq q{C1}) { #Possibly better to get sonority trough of first consonant cluster, # but this is simpler to explain: if ($pron =~ m{($consonant_re)}) { my $c1 = $1; return $c1; } elsif ($pron =~ m{($vowel_re)}) { #Can't find a consonant, so use first vowel return $1; } else { #Can't find a consonant or vowel, so return anything return $pron; } } elsif ($compare_part eq q{P1}) { if ($pron =~ m{($ipa_modified_letter)}) { return $1; } else { #Can't find a segment, so return anything return $pron; } } else { die(qq{Don't know part designation $compare_part.\n}); } return $pron; } =for comment Extract and store the part that will be used in the tests, simplifying to the essential features where possible. =cut sub select { my Signif::Pron $p = shift(@_); my $compare_part = shift(@_); #e.g., 'C1', 'P1' my $compare_metric = shift(@_); #e.g., 'place', 'place+voice' my $part = $p->extract_part($compare_part); #print(qq{ $p->{pron} => \n}); #print(qq{ $part\t}); #To do: convert into features needed for metric if ($compare_part eq q{C1} and $compare_metric eq q{place+voice}) { $p->{extract} = Signif::PlaceVoiceBundle->new($part); } elsif ($compare_metric eq q{place}) { $p->{extract} = Signif::PlaceBundle->new($part); } elsif ($compare_metric eq q{coronality}) { $p->{extract} = Signif::CoronalityBundle->new($part); } else { die(qq{Unrecognized comparison: $compare_part $compare_metric}); } return $p; } sub rating { my Signif::Pron $p1 = shift(@_); return $p1->{rating}; } sub score_against { my Signif::Pron $p1 = shift(@_); my Signif::Pron $p2 = shift(@_); return $p1->{extract}->score_against($p2->{extract}); } 1;