#!perl # -*- coding: utf-8-unix; -*- =for comment cond_cons.pl - computes conditional consistencies on sound-spelling links Copyright © 2003 by Brett Kessler This file is part of CondCons. CondCons is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. CondCons is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with CondCons; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Requirements: Perl 5. The program was developed under Perl 5.8.0. Read/write access to subdirectory called "out". pgive, an executable file for Monte Carlo tests. It needs to be in out/ A file of lexical information to process (see Load_Words function for details) =cut use strict; use warnings; use Getopt::Long; #Number of rearrangements to use in the Monte Carlo test for type-count based # analyses. For the much larger token-based analyses, one tenth of this # amount will be used: our $N_ITER = 10000; our $output_dir = q{out}; our $pgive; our $words = []; #Matches the vowel in our idiosyncratic coding scheme for North American # English; can be overridden with e.g., --vowel=aeiou my $vowel = qr{[aAcCeEioORuUVWyY]}; #Should we try to assign silent E (e=0 alignment) to vowel and/or coda? # --noassign-silent-e advisable if not English. my $assign_silent_e = 1; $| = 1; =for comment Given letter-sound alignments ($align) as a string (e.g., "t=t r=r a=a c=k t=t"), reformats them to the level of entire onsets, vowels, and codas, and returns as a list: [onset, vowel, coda] where each element is a list: [spelling, pronunciation], e.g.: [["tr", "tr"], ["a", "a"], ["ct", "kt"]] =cut sub Tripartition_Align($) { my($align) = @_; my @alignments = split(m{ }, $align); my $onset_spell = q{}; my $onset_pron = q{}; my $vowel_spell = q{}; my $vowel_pron = q{}; my $coda_spell = q{}; my $coda_pron = q{}; foreach my $alignment (@alignments) { my($spell, $pron) = split(m{=}, $alignment); if ($pron =~ m{^(.*?)($vowel)}o) { my $o_part = $1; $vowel_pron = $2; $vowel_spell = $spell; if ($o_part ne q{}) { #E.g., in "cute", vowel "u" in part spells onset /j/ $onset_spell .= $spell; $onset_pron .= $o_part; } } elsif ($vowel_pron eq q{}) { #Still in onset $onset_spell .= $spell; $onset_pron .= $pron; } else { #Coda: if ($pron eq q{0} and $assign_silent_e and $spell eq q{e}) { #Special case in English: silent E my $coda = qq{$coda_spell=$coda_pron}; my $assigned_to_vowel = 0; #Silent E belongs to the vowel if that vowel is otherwise spelt # with just 1 letter, and the coda is otherwise a single letter # and sound (with a few exceptions): if (!$coda_spell or ( $vowel_spell =~ m{^[aeiouy]$} and $coda =~ m{^( b=b|c=s|ch=k|d=d|f=f|g=D|gu=g|k=k|l=l|m=m|n=n|p=p|r=r|s=[sz]| t=t|th=Q|v=v|z=z|st=st)$}x)) { $vowel_spell .= qq{_$spell}; $assigned_to_vowel = 1; } #Silent E belongs to the coda if the coda has the following # letter-sound correspondences. NB E can belong to vowel AND to coda: if (!$assigned_to_vowel or $coda =~ m{(d?g=.*D$)|(g=.*Z$)|(c=.*s$)|(s=.*[sz]$)|(th=.*Q$)| (v=.*v$)|(z=.*z$)|(u=)}x) { $coda_spell .= qq{_$spell}; } } else { $coda_spell .= $spell; $coda_pron .= $pron; } } } [[$onset_spell, $onset_pron], [$vowel_spell, $vowel_pron], [$coda_spell, $coda_pron]]; } =for comment Load all data from a file. File has one word per line, e.g.: aid ed 7 1 l 0 VC ai=e d=d Format: TAB delimited spelling pronunciation (NB: a fixed transcription scheme is assumed!) frequency - as integer; assumed to be log grade1-frequency - as integer; assumed to be raw, and 20 is taken to be minimimum frequency for a word to be included in Grade 1 word set lexical stratum - this field is ignored inflected - this field is ignored pattern - if "CVC", this word will be included in special consonant-vowel-consonant tests alignment - space-delimited alignments of spell=pron. Special case: e=0 is silent E Data is entered into a global array $words, where each word is a list of format: [spelling, pronunciation, frequency, grade1-frequency, pattern, OVC-alignment] where the last element is the alignment reorganized into a [spelling, pronunciation] list for each of [onset, vowel, coda] As a side-effect, the file OVC-alignments, which lists these tripartite alignments, is generated. Note that all statistics are computed over these OVC chunks. =cut sub Load_Words() { my $header = <>; open(ALIGNMENTS, qq{>$output_dir/OVC-alignments}) or die(qq{Can't write $output_dir/OVC-alignments: $!}); while (defined(my $line = <>)) { chomp($line); my($spell, $pron, $freq, $grade1, $gram, $infl, $pat, $align) = split(m{\t}, $line); my $OVC = Tripartition_Align($align); printf ALIGNMENTS (q{%-20s }, qq{$spell /$pron/}); printf ALIGNMENTS (qq{%-6s %-5s %-6s\n}, ($OVC->[0]->[0] || q{-}), $OVC->[1]->[0], ($OVC->[2]->[0] || q{-})); push(@$words, [$spell, $pron, $freq, $grade1, $pat, $OVC]); } close(ALIGNMENTS); } #Constants. #Different stats for reading (letter-to-sound) and spelling (sound-to-letter): use constant READING => 0; use constant SPELLING => 1; my $direction_name = [qw{Reading Spelling}]; my $direction_label = [qw{read spell}]; use constant ONSET => 0; use constant VOWEL => 1; use constant CODA => 2; my $syllable_part_name = [qw{Onset Vowel Coda}]; my $syllable_part_label = [qw{onset vowel coda}]; #Different stats for types (each word entered once) and tokens (each word # counted multiple times, according to its frequency (which is log): my $TYPES = 0; my $TOKENS = 1; my $count_name = [qw{Types Tokens}]; my $count_label = [qw{type token}]; #Different stats for the entire word list, or just those of CVC structure: use constant ALL_MONOS => 0; use constant CVC => 1; my $syllable_structure_name = [q{All Monosyllables}, q{CVC Words Only}]; #Different stats for the entire word list, or just those that do not have # /r/ in the coda (such words have different sound-spelling correspondences # in English): use constant INCLUDE_r => 0; use constant EXCLUDE_r => 1; my $include_r_name = [q{Include /r/}, q{Exclude /r/}]; #Constants for accessing elements in word data in $words array. # Nowadays we would "use fields": use constant SPELL => 0; use constant PRON => 1; use constant FREQ => 2; use constant GRADE1 => 3; use constant PAT => 4; use constant OVC => 5; use constant COUNT_CONSONANTS => 1; #Assign unique number to each distinct string in a given set: sub Tokenize($$) { my($bag, $string) = @_; if (!exists($bag->{hash}->{$string})) { $bag->{hash}->{$string} = $bag->{next}; $bag->{next}++; } return $bag->{hash}->{$string}; } =for comment The workhorse. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $count - $TYPES or $TOKENS? (latter is frequency-weighted) $syllable_structure - CVC or ALL_MONOS $r - INCLUDE_r or EXCLUDE_r (i.e., exclude words with /r/ in coda?) $grade1 - minimum grade1-frequency (or undef to not filter by grade1-freq) $given - the part of the syllable to use as predictor in cond. cons.: ONSET, VOWEL, CODA (if unconditional, use undef) $match_from - if defined, run only for words where the syllable_part has this content (in the source side) $count_given_segments - if defined, then instead of using the $given string itself as a predictor, uses the number of segments in that string. Some of these parameters tell which words to use ($syllable_structure, $r, $grade1, $match_from); words not passing these filters are completely ignored. Once the requested data is extracted from the remaining words, they are written to a temporary file which is fed to pgive, an external C program that computes the mean, permuted mean, and p value. These values are packaged into a structure and returned. =cut sub Find_Consistency($$$$$$$$$) { my($direction, $syllable_part, $count, $syllable_structure, $r, $grade1, $given, $match_from, $count_given_segments) = @_; my $data = []; #FROM is the string on the source side (letters for READING, sounds # for SPELLING); TO is the other side. my $from_tokens = {next => 0, hash => {}}; my $to_tokens = {next => 0, hash => {}}; #"given" is always some other syllable part on the FROM side. my $given_tokens = {next => 0, hash => {}}; foreach my $word (@$words) { next if $syllable_structure == CVC and $word->[PAT] ne q{CVC}; next if $r == EXCLUDE_r and $word->[OVC]->[CODA]->[PRON] =~ m{^r}; next if defined($grade1) and $word->[GRADE1] < $grade1; my $part = $word->[OVC]->[$syllable_part]; my $from; my $to; my $given_extract = 0; if ($direction == READING) { $from = $part->[SPELL]; $to = $part->[PRON]; #print qq{ <$from> -> /$to/}; if (defined($given)) { $given_extract = $word->[OVC]->[$given]->[SPELL]; #print qq{ | <$given_extract>}; } } else { # $direction == SPELLING $from = $part->[PRON]; $to = $part->[SPELL]; #print qq{ /$from/ -> <$to>}; if (defined($given)) { $given_extract = $word->[OVC]->[$given]->[PRON]; #print qq{ | /$given_extract/}; } } next if (defined($match_from) and $match_from ne $from); #print qq{\n}; my $reps = ($count == $TYPES) ? 1 : $word->[FREQ]; my $from_token = Tokenize($from_tokens, $from); my $to_token = Tokenize($to_tokens, $to); my $given_token = (defined($count_given_segments)) ? length($given_extract) : Tokenize($given_tokens, $given_extract); #Token weighting: simply repeat the word that many times: for (my $rep = 0; $rep < $reps; $rep++) { push(@$data, [$from_token, $given_token, $to_token]); } } if (!@$data) {return undef;} #Assign a unique name for all the temp files, I guess so we can, if # we choose, run them through another statistics program. my $file_name = qq{$output_dir/$direction$syllable_part$count$syllable_structure$r} . ((defined($grade1)) ? $grade1 : q{}) . q{-} . ((defined($given)) ? $given : q{}); #Write all the words out to the file: open(DATA, qq{>$file_name}) or die; print DATA scalar(@$data), qq{\n}; foreach my $datum (@$data) { print DATA join(qq{\t}, @$datum), qq{\n}; } close(DATA); #Run pgive, the external Monte Carlo program: my $n_iter = (defined($given)) ? $N_ITER : 0; $n_iter /= 10 if ($count == $TOKENS); my $command = (defined($pgive)) ? $pgive : qq{$output_dir/pgive}; $command .= qq{ --in=$file_name --iter=$n_iter}; my $output = `$command`; die unless defined($output); # print $output; #Parse pgive output to find the answers: my $answer = {iter => $n_iter, n => scalar(@$data)}; if ($output =~ m{\np\t(\S+)}) { $answer->{p} = sprintf(q{%.3f}, $1); $answer->{p} =~ s{^0}{}; } if ($output =~ m{Base\t(\S+)}) { $answer->{base} = sprintf(q{%.3f}, $1); $answer->{base} =~ s{^0}{}; } if ($output =~ m{PMEAN\t(\S+)}) { $answer->{pmean} = sprintf(q{%.3f}, $1); $answer->{pmean} =~ s{^0}{}; } $answer; } #If we're looking at one part of the syllable, what are the other two # parts that can be used as conditionals? sub Other_Parts($) { my($first) = @_; if ($first == ONSET) {return (VOWEL, CODA);} if ($first == VOWEL) {return (ONSET, CODA);} if ($first == CODA) {return (ONSET, VOWEL);} die; } # (E - O)^2 / E (chi^2 component): sub Part($$) { my($E, $O) = @_; if ($E <= 0) {die(qq{E = $E!});} my $diff = $E - $O; ($diff * $diff) / $E; } # X^2 metric (*not* the probability) for a 2 X 2 table: sub Chi2($$$$) { my($O1_1, $row, $column, $N) = @_; #print qq{Chi2($O1_1, $row, $column, $N)\n}; if ($N <= $row || $N <= $column) { return (0, 0); } my $O1_2 = $row - $O1_1; my $O2_1 = $column - $O1_1; my $O2_2 = $N - $O1_1 - $O1_2 - $O2_1; my $not_row = $N - $row; my $not_column = $N - $column; my $E1_1 = ($row * $column) / $N; my $E1_2 = ($row * $not_column) / $N; my $E2_1 = ($column * $not_row) / $N; my $E2_2 = ($not_column * $not_row) / $N; my $chi2 = Part($E1_1, $O1_1) + Part($E1_2, $O1_2) + Part($E2_1, $O2_1) + Part($E2_2, $O2_2); ($chi2, $E1_1); } # p values corresponding to various X^2 metrics for a 2 X 2 table: sub Chi2_p($) { my($chi2) = @_; # 1 df if ($chi2 >= 10.828) {return .001;} elsif ($chi2 >= 7.879) {return .005;} elsif ($chi2 >= 6.635) {return .01;} elsif ($chi2 >= 5.024) {return .025;} elsif ($chi2 >= 3.841) {return .05;} elsif ($chi2 >= 2.706) {return .1;} else {return 1;} } =for comment Here begins the part where we request analyses and write the answers to HTML table. Map: &Go starts the "home page" (via &Start_Index). Do_Direction(READING) Do_Syllable_Part(VOWEL) Do_Summary(TYPES) Do_Grade(unrestricted by grade): starts and stops a new TABLE Do_Syllable_Structure(ALL_MONOS) Do_R(INCLUDE_r) writes unconditioned analysis in TABLE (via Find_Consistency) Do_Given(ONSET) writes one conditional analysis in TABLE Do_Given(CODA) (like Do_Given(ONSET)) print improvement ratio Do_R(EXCLUDE_r) (like Do_R(INCLUDE_r)) Do_Syllable_Structure(CVC) (like Do_Syllable_Structure(ALL_MONOS)) Do_Grade(grade 1) (like Do_Grade(unrestricted by grade)) Do_Summary(TOKENS) Per_Length(VOWEL) Do_Syllable_Part(ONSET) (like Do_Syllable_Part(VOWEL)) Do_Syllable_Part(CODA) ditto Do_Direction(SPELLING) (like Do_Direction(READING)) Does a per-string analysis for a specific syllable part given another. E.g., may analyze the spelling onset spelling "CH" given the vowel in the reading direction. If the string has only one counterpart in the opposite direction (as, e.g, onset "B" is always /b/) it just reports that it is at ceiling and exits; no conditional consistency can improve on that. Otherwise it looks at conditional consistencies: does taking the $given into account help predict the correspondence for this string? A Monte Carlo test is used for that. If p > .05, reports "n.s." and returns. Otherwise, runs a chi-squared test to see whether knowing the value of the "given" (conditional) syllable part significantly helps predict the value of the counterpart. If so at p <= .05, reports the p value and lists the data and examples for the individual predictors. Input: $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $grade1_cutoff - minimum grade1-frequency (or undef to not filter by grade1-freq) $given - the part of the syllable to use as predictor in cond. cons.: ONSET, VOWEL, CODA (if unconditional, use undef) $string - restruct only to words that have this value in the FROM part of the specified syllable_part. Always by types (not tokens), and includes all words, including non-CVC and those with /r/. Returns: 'ceiling', 'n.s.', or 'signif'. =cut sub String_Given($$$$$) { my($direction, $syllable_part, $grade1_cutoff, $given, $string) = @_; print STRINGS qq{

For string "$string"

\n}; #Ignoring the given, see if unconditionally fully predictable: my $context_free = Find_Consistency($direction, $syllable_part, $TYPES, ALL_MONOS, INCLUDE_r, $grade1_cutoff, undef, $string, undef); if (!defined($context_free) or $context_free->{base} >= 1.0) { print STRINGS qq{

Ceiling

\n}; return q{ceiling}; } #Now run with the given part, as a whole, as predictor: my $answer = Find_Consistency($direction, $syllable_part, $TYPES, ALL_MONOS, INCLUDE_r, $grade1_cutoff, $given, $string, undef); if (defined($answer) and $answer->{p} <= .05) { #Only if given as a field is reliable predictor do we proceed with # the individual values in that syllable part. #Run through all the words that have the specified string in the # FROM side, collecting correspondence statistics: print STRINGS qq{

p = $answer->{p}

\n}; #Egg has as key all the given : TO pairs. (E.g., when looking at # vowel "A" in READING direction given onset, one key would be # "w" : "A" for words like "wand". For each key, value is full list # of words that have those values. Therefore the hash also acts as # a counter for one cell of the 2X2 table: my $egg = {}; #Number of words that have the TO value (regardless of the given value): my $N_to = {}; #Number of words that have the GIVEN value (regardless of TO): my $N_given = {}; #Number of words altogether: my $N = 0; foreach my $word (@$words) { next if defined($grade1_cutoff) and $word->[GRADE1] < $grade1_cutoff; my $part = $word->[OVC]->[$syllable_part]; my $this_string = ($direction == READING) ? $part->[SPELL] : $part->[PRON]; next unless $this_string eq $string; my $g = $word->[OVC]->[$given]->[($direction == READING) ? SPELL : PRON] || q{-}; my $to = $part->[($direction == READING) ? PRON : SPELL]; my $key = qq{$g : $to}; push(@{$egg->{$key}}, $word->[SPELL]); $N_to->{$to}++; $N_given->{$g}++; $N++; } #For each combination of TO and GIVEN, run the chi^2: foreach my $key (sort(keys(%$egg))) { my($given, $to) = $key =~ m{^(.*?) : (.*)$}; my($chi2, $E) = Chi2(scalar(@{$egg->{$key}}), $N_to->{$to}, $N_given->{$given}, $N); my $p = Chi2_p($chi2); if ($p <= .05) { my $examples = $egg->{$key}; my $observed = scalar(@$examples); #Don't bother reporting *extremely* small samples: next if $E < 1 and $observed <= 1; print STRINGS qq{

$given: $to

\n

}, join(q{, }, @$examples), qq{

\n}; printf STRINGS qq{

%.1f expected, $observed observed (p <= $p)

}, $E; } } return q{signif}; } else { print STRINGS qq{

Not significant.

\n}; return q{n.s.}; } } =for comment Runs a per-string analysis for all of the strings in a particular syllable part. Input: $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $grade1_cutoff - minimum grade1-frequency (or undef to not filter by grade1-freq) $given - the part of the syllable to use as predictor in cond. cons.: ONSET, VOWEL, CODA (if unconditional, use undef) $string_array - list of strings to apply this analysis to, one at a time. Returns a structure that counts how many of the strings are improved conditionally, and how many are already at ceiling. =cut sub Per_String_Given($$$$$) { my($direction, $syllable_part, $grade1_cutoff, $given, $string_array) = @_; my $conditioner = { name => $syllable_part_name->[$given], n_ceiling => 0, n_improved => 0, }; print STRINGS qq{

Given $syllable_part_name->[$given]

\n}; print STDERR qq{ Given $syllable_part_name->[$given]\n}; foreach my $string (@$string_array) { my $res = String_Given( $direction, $syllable_part, $grade1_cutoff, $given, $string); if ($res eq q{ceiling}) { $conditioner->{n_ceiling}++; } elsif ($res eq q{signif}) { $conditioner->{n_improved}++; } } return $conditioner; } =for comment Lists all of the non-empty strings found in the indicated syllable part for the indicated side. E.g., for READING ONSET, lists all the spellings found in the onset. Input: $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $grade1_cutoff - minimum grade1-frequency (or undef to not filter by grade1-freq) Returns the strings as an array in alphabetical order. =cut sub Make_Strings($$$) { my($direction, $syllable_part, $grade1_cutoff) = @_; my $strings = {}; foreach my $word (@$words) { next if defined($grade1_cutoff) and $word->[GRADE1] < $grade1_cutoff; my $part = $word->[OVC]->[$syllable_part]; my $string = ($direction == READING) ? $part->[SPELL] : $part->[PRON]; next if $string eq q{}; $strings->{$string}++; } return [sort(keys(%$strings))]; } =for comment For a given part of the syllable (e.g., ONSET in the READING direction), makes a new HTML page on which are reported conditional per-string information given each of the other two syllable parts (here, VOWEL, CODA) in turn. I.e., gives Per_String_Given information for each of the two givens. =cut sub Per_String($$$) { my($direction, $syllable_part, $grade1_cutoff) = @_; my $facts = {}; my $file_name = qq{$direction_label->[$direction]-$syllable_part_label->[$syllable_part]-strings}; if (defined($grade1_cutoff)) { $file_name .= q{-Grade1}; } $facts->{file_name} = qq{$file_name.html}; open(STRINGS, qq{>$output_dir/$file_name.html}) or die($file_name); my $date = `date`; print STRINGS qq{ $file_name }; print STDERR qq{ $file_name.\n}; my $string_array = Make_Strings($direction, $syllable_part, $grade1_cutoff); $facts->{n_strings} = scalar(@$string_array); foreach my $given (Other_Parts($syllable_part)) { push(@{$facts->{conditioner}}, Per_String_Given( $direction, $syllable_part, $grade1_cutoff, $given, $string_array)); } print STRINGS qq{

Webster: Brett Kessler
Last change $date
}; close(STRINGS); return $facts; } =for comment Investigates whether the number of consonants in onset or coda helps predict vowel. Only the direction (READING or SPELLING) is specified. This is computed over all monosyllables, even those with /r/ and those above 1st grade. =cut sub Per_Length($) { my($direction) = @_; print qq{

Effect of Consonant Cluster Length on Vowel Correspondences

\n}; print STDERR qq{ Consonant Cluster Lengths\n}; foreach my $given (ONSET, CODA) { print qq{

Given $syllable_part_name->[$given]

\n}; my $answer = Find_Consistency($direction, VOWEL, $TYPES, ALL_MONOS, INCLUDE_r, undef, $given, undef, COUNT_CONSONANTS); if (defined($answer)) { printf qq{

C = $answer->{base}, improvement of %.3f over average permuted C of $answer->{pmean} (p = $answer->{p})

\n}, ($answer->{base} - $answer->{pmean}) / $answer->{pmean}; } else { printf qq{

No words available.

\n}; } } } =for comment Begins a "Home page" for the analysis. =cut sub Start_Index() { print qq{ Relationships Between Sounds and Letters in English Monosyllables

These are detailed results of the study described in:

Kessler, B., & Treiman, R. (2001.) Relationships between sounds and letters in English monosyllables. Journal of Memory and Language, 44, 592-617.

Reading and spelling consistencies were computed for onset, vowel, and coda units, over this list of words. The referenced word list gives an alignment for each word, showing how the letters and sounds were assigned to the three parts of the syllable.

}; } #Ends the "home page" for the analysis. sub Stop_Index() { print qq{ }; } #Begins a table on a new HTML page. Argument is the base name of the new # file. sub Start_Table($) { my($file_name) = @_; open(TABLE, q{>}, qq{$output_dir/$file_name.html}) or die(qq{Can't create file $output_dir/$file_name.html: $!}); print TABLE qq{ $file_name

$file_name

}; } #Print message to STDERR as well as to current STDOUT file. sub Echo { print @_; print STDERR @_; } #Creates a meaningful name for an HTML page for a table. sub Table_Name_Base($$$$) { my($direction, $syllable_part, $count, $grade1) = @_; my $table_label = qq{$direction_label->[$direction]-$syllable_part_label->[$syllable_part]-$count_label->[$count]}; if (defined($grade1)) { $table_label .= qq{-graded}; } else { $table_label .= qq{-ungraded}; } return $table_label; } =for comment Runs a conditional analysis with the specified parameters and writes the answer as one line in a table. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $count - $TYPES or $TOKENS? (latter is frequency-weighted) $grade1 - minimum grade1-frequency (or undef to not filter by grade1-freq) $syllable_structure - CVC or ALL_MONOS $r - INCLUDE_r or EXCLUDE_r (i.e., exclude words with /r/ in coda?) $given - the part of the syllable to use as predictor in cond. cons.: ONSET, VOWEL, CODA =cut sub Do_Given($$$$$$$) { my($direction, $syllable_part, $count, $grade1, $syllable_structure, $r, $given) = @_; my $answer = Find_Consistency($direction, $syllable_part, $count, $syllable_structure, $r, $grade1, $given, undef, undef); if (!defined($answer)) { printf TABLE qq{\n}; return undef; } my $improvement = ($answer->{base} - $answer->{pmean}) / $answer->{pmean}; printf TABLE qq{\n}, $improvement; if ($answer->{p} > .05) { return undef; } else { return {name => $syllable_part_name->[$given], improvement => $improvement}; } } =for comment Runs analyses with the specified parameters and writes the answer in a table. A separate analysis is run for each of the two possible GIVENs. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $count - $TYPES or $TOKENS? (latter is frequency-weighted) $grade1 - minimum grade1-frequency (or undef to not filter by grade1-freq) $syllable_structure - CVC or ALL_MONOS $r - INCLUDE_r or EXCLUDE_r (i.e., exclude words with /r/ in coda?) =cut sub Do_R($$$$$$) { my($direction, $syllable_part, $count, $grade1, $syllable_structure, $r) = @_; print TABLE (q{}); print STDERR qq{ $include_r_name->[$r]\n}; my $answer = Find_Consistency($direction, $syllable_part, $count, $syllable_structure, $r, $grade1, undef, undef, undef); if (!defined($answer)) { print STDERR qq{ No words meet criteria.\n}; print TABLE qq{\n}; return; } print STDERR qq{ N=$answer->{n}\n}; print TABLE qq{\n}; my $parts = []; my $ns = 0; foreach my $given (Other_Parts($syllable_part)) { my $part = Do_Given( $direction, $syllable_part, $count, $grade1, $syllable_structure, $r, $given); if (defined($part)) { push(@$parts, $part); } else { $ns = 1; } } if (!$ns) { my $hi; my $lo; if ($parts->[0]->{improvement} >= $parts->[1]->{improvement}) { $hi = $parts->[0]; $lo = $parts->[1]; } else { $hi = $parts->[1]; $lo = $parts->[0]; } printf TABLE qq{\n}, $hi->{improvement} / $lo->{improvement}; } } our $rless_desired = 1; =for comment Runs analyses for words with the indicated syllable structure (CVC or not). A separate analysis is run for words with and without /r/. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $count - $TYPES or $TOKENS? (latter is frequency-weighted) $grade1 - minimum grade1-frequency (or undef to not filter by grade1-freq) $syllable_structure - CVC or ALL_MONOS =cut sub Do_Syllable_Structure($$$$$) { my($direction, $syllable_part, $count, $grade1, $syllable_structure) = @_; print STDERR qq{ $syllable_structure_name->[$syllable_structure]\n}; print TABLE qq{ }; foreach my $r (INCLUDE_r, EXCLUDE_r) { next if $r == EXCLUDE_r and !$rless_desired; Do_R($direction, $syllable_part, $count, $grade1, $syllable_structure, $r); } } our $CVC_desired = 1; =for comment Runs analyses with the specified parameters and writes the answer in a table. A separate analysis is run for all words and then just CVC words. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $count - $TYPES or $TOKENS? (latter is frequency-weighted) $grade1 - minimum grade1-frequency (or undef to not filter by grade1-freq) =cut sub Do_Grade($$$$) { my($direction, $syllable_part, $count, $grade1) = @_; my $table_label = Table_Name_Base($direction, $syllable_part, $count, $grade1); Start_Table($table_label); my $html_file_name = qq{$table_label.html}; foreach my $syllable_structure (ALL_MONOS, CVC) { next if $syllable_structure == CVC and !$CVC_desired; Do_Syllable_Structure( $direction, $syllable_part, $count, $grade1, $syllable_structure); } print TABLE qq[
Include /-r/1 Unconditional Consistency Conditional Consistency
Given Attested Permuted2 Improvement3 p
No words
$syllable_part_name->[$given]$answer->{base}$answer->{pmean}%5.3f$answer->{p}
}, (($r == INCLUDE_r) ? q{Yes} : q{No}), q{(No words)
$answer->{base}
$hi->{name} / $lo->{name}%.3f
$syllable_structure_name->[$syllable_structure]

1Whether words with postvocalic /r/ were included.

2Average conditional consistency across $N_ITER permutations randomly reassigning the vowel between words.

3Increase of attested conditional consistency over permuted conditional consistency, as a proportion of the latter. Significance test is one-tailed and asymmetric. ]; close(TABLE); return $html_file_name; } our $children_cutoff = 20; =for comment Runs analyses with the specified parameters and writes the answer in a table. A separate analysis is run for all words and then just grade-1 words. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA $count - $TYPES or $TOKENS? (latter is frequency-weighted) =cut sub Do_Summary($$$) { my($direction, $syllable_part, $count) = @_; print STDERR qq{ $count_name->[$count]\n}; my $file_names = {}; $file_names->{adult} = Do_Grade($direction, $syllable_part, $count, undef); if ($children_cutoff =~ m{^\d+$}) { $file_names->{child} = Do_Grade($direction, $syllable_part, $count, $children_cutoff); } return $file_names; } our $types_desired = 1; our $tokens_desired = 1; our $strings_desired = 1; sub Half_Menu($$) { my $half = shift(@_); my $name = shift(@_); return unless defined($half); print qq{

  • By $name, for adult vocabulary}; if (exists($half->{child})) { print qq{; for child vocabulary}; } print qq{
  • \n}; } =for comment Runs analyses with the specified parameters and writes the answer in a table. A separate analysis is run for type counts and then token counts. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) $syllable_part - ONSET, VOWEL, or CODA =cut sub Do_Syllable_Part($$) { my($direction, $syllable_part) = @_; my $part_name = qq{$syllable_part_name->[$syllable_part]s}; Echo(qq{

    $direction_name->[$direction] $part_name

    \n}); Echo(qq{

    Summaries across all $part_name

    \n}); my $file_names = {}; foreach my $count ($TYPES, $TOKENS) { next if $count == $TYPES and !$types_desired; next if $count == $TOKENS and !$tokens_desired; $file_names->{$count} = Do_Summary($direction, $syllable_part, $count); } print qq{\n\n}; return unless $strings_desired; Echo(qq{

    Analysis over each $syllable_part_name->[$syllable_part]

    \n}); my $adults = Per_String($direction, $syllable_part, undef); my $children; $children = Per_String($direction, $syllable_part, 20) if $children_cutoff =~ m{^\d+$}; print qq{\n}; print qq{\n} if defined($children); print qq{\n}; print qq{} if defined($children); print qq{\n}; print qq{\n}; print qq{} if defined($children); print qq{\n}; print qq{\n}; print qq{} if defined($children); print qq{\n}; print qq{\n}; print qq{} if defined($children); print qq{\n}; print qq{\n}; print qq{} if defined($children); print qq{
    AdultsChildren
    Distinct $syllable_part_name->[$syllable_part] strings $adults->{n_strings}$children->{n_strings}
    At ceiling $adults->{conditioner}->[0]->{n_ceiling}$children->{conditioner}->[0]->{n_ceiling}
    Improved by $adults->{conditioner}->[0]->{name} $adults->{conditioner}->[0]->{n_improved}$children->{conditioner}->[0]->{n_improved}
    Improved by $adults->{conditioner}->[1]->{name} $adults->{conditioner}->[1]->{n_improved}$children->{conditioner}->[1]->{n_improved}
    DetailsDetails
    }; } our $vowel_desired = 1; our $onset_desired = 1; our $coda_desired = 1; our $length_desired = 1; =for comment Runs analyses for the specified direction. A separate analysis is run for each part of the syllable as a focus. In addition, vowels get the special analysis where the *length* of the onset or coda is used as a predictor. $direction - READING (letter-to-sound) or SPELLING (sound-to-letter) =cut sub Do_Direction($) { my($direction) = @_; foreach my $syllable_part (VOWEL, ONSET, CODA) { next if $syllable_part == VOWEL and !$vowel_desired; next if $syllable_part == ONSET and !$onset_desired; next if $syllable_part == CODA and !$coda_desired; Do_Syllable_Part($direction, $syllable_part); next unless $length_desired; Per_Length($direction) if $syllable_part == VOWEL; } } our $reading_desired = 1; our $spelling_desired = 1; sub Read_Options { my $vowel_string; my $children_desired = 1; my $result = GetOptions( q{iter:i} => \$N_ITER, q{vowel:s} => \$vowel_string, q{output:s} => \$output_dir, q{pgive:s} => \$pgive, q{assign-silent-e!} => \$assign_silent_e, q{reading!} => \$reading_desired, q{spelling!} => \$spelling_desired, q{vowel!} => \$vowel_desired, q{onset!} => \$onset_desired, q{coda!} => \$coda_desired, q{length!} => \$length_desired, q{types!} => \$types_desired, q{tokens!} => \$tokens_desired, q{strings!} => \$strings_desired, q{children!} => \$children_desired, q{children-cutoff:i} => \$children_cutoff, q{cvc!} => \$CVC_desired, q{rless!} => \$rless_desired); die(qq{GetOptions freaked out: $!}) unless $result; if (defined($vowel_string) and $vowel_string =~ m{\S}) { $vowel = qr{[$vowel_string]}; } if (!mkdir($output_dir)) { die(qq{Cannot create --output directory $!}) unless $! =~ m{^File exists}; } if (!$children_desired) {$children_cutoff = q{no-children};} } #Do it all: sub Go() { Read_Options(); Start_Index(); Load_Words(); foreach my $direction (READING, SPELLING) { next if $direction == READING and !$reading_desired; next if $direction == SPELLING and !$spelling_desired; Do_Direction($direction); } Stop_Index(); } Go();