use strict; use warnings; package Signif::Language; use fields qw{file_name id sem_to_words}; use Signif::Word; sub Load { my $type = shift(@_); my $libraries = shift(@_); my $file_name = shift(@_); my $retention_rates = shift(@_); my $suppress_loans_at = shift(@_); my $suppress_redundants = shift(@_); my $suppress_motivateds_at = shift(@_); my $weight_derivs = shift(@_); my $max_prons_per_sem = shift(@_); my Signif::Language $lang = fields::new($type); my $file; my $full_name; foreach my $dir (@$libraries) { my $full_file_name = qq{$dir/$file_name.xml}; if (defined(open($file, q{<}, $full_file_name))) { $full_name = $full_file_name; last; } } if (!defined($full_name)) { die(qq{Data file `$file_name' not found for language data.}); } $lang->{file_name} = $full_name; my $parser = XML::Parser::Expat->new(); my $word; my $pron; $parser->setHandlers( q{Start} => sub {my($p, $element, %attrs) = @_; eval { if ($element eq q{pron}) { $pron = $word->start_pron( $suppress_loans_at, $suppress_redundants, $suppress_motivateds_at, %attrs); } elsif ($element eq q{word}) { my $sem = $attrs{sem}; die($sem) unless defined($sem) and $sem =~ m{^[A-Z]+$}; $word = Signif::Word->new($sem); } elsif ($element eq q{language}) { $lang->{id} = $attrs{id}; print(qq{$lang->{id}\n}); } else {die(qq{Start $element\n});} }; if ($@) {die(qq{$@\nAt $file_name line } . $p->current_line());} }, q{End} => sub {my($p, $element) = @_; eval { if ($element eq q{pron}) { if (defined($pron)) { $pron->end(); if ($pron->pron() eq q{}) {$word->withdraw_pron($pron);} $pron = undef; } } elsif ($element eq q{word}) { $word = $word->end($retention_rates, $weight_derivs, $max_prons_per_sem); #may set itself to undef if no prons left if (defined($word)) { $lang->{sem_to_words}->{$word->sem()} = $word; $word = undef; } } elsif ($element eq q{language}) {} else {die(qq{End $element\n});} }; if ($@) {die(qq{$@\nAt $file_name line } . $p->current_line());} }, q{Char} => sub {my($p, $str) = @_; if (defined($pron)) {$pron->add_char($str);} }, ); $parser->parse($file); close($file); $parser->release(); return $lang; } sub for_each_sem { my Signif::Language $lang = shift(@_); my $cb = shift(@_); while (my($sem, $word) = each(%{$lang->{sem_to_words}})) { my $rating = $word->rating(); $cb->($sem, $rating); } return $lang; } sub get_by_sem { my Signif::Language $lang = shift(@_); my $sem = shift(@_); return $lang->{sem_to_words}->{$sem}; } sub sem_rating { my Signif::Language $lang = shift(@_); my $sem = shift(@_); my $word = $lang->{sem_to_words}->{$sem}; if (!defined($word)) {return 0;} return $word->rating(); } sub select { my Signif::Language $lang = shift(@_); my $active_sems = shift(@_); #list of sems select for use. my $compare_part = shift(@_); #e.g., 'C1' my $compare_metric = shift(@_); #e.g., 'place+voice' my $sem_to_words = $lang->{sem_to_words}; foreach my $sem (@$active_sems) { $sem_to_words->{$sem}->select($compare_part, $compare_metric); } return $lang; } 1;