/* Computes whether the data in group A are more significantly more similar to those in group B than one would expect by chance. Uses Monte Carlo test. Reads input from stdin: Number of items in each group (same count applies to each group) 23.4 // distance for A=0 * B=0 16.4 // distance for A=0 * B=1 ... 84.6 // distance for A=1 * B=0 ... etc Then does a Monte Carlo test to see if current state of world (words matched up by same index in each list) yields smaller difference measure than in large number of permutations (swapping indices in one column). */ #include #include #include #include static int iters = 1000; static void Read_Options(int argc, char* argv[]) { static struct option options[] = { {"iters", 1, 0, 'i'}, {NULL, 0, 0, 0}, }; char* prog_name = argv[0]; while (1) { int option_index; int c = getopt_long(argc, argv, "", options, &option_index); if (c == -1) {break;} switch (c) { case 'i': if (optarg && optarg[0]) { char* next; iters = strtol(optarg, &next, 10); if (*next) { (void) fprintf(stderr, "--iters=%s should be an integer.\n", optarg); exit(EXIT_FAILURE); } } else { (void) fprintf(stderr, "--iters= should be an integer.\n"); exit(EXIT_FAILURE); } } } } static int n_items; static float Read_Int(void) { int i; int n_read = scanf("%d\n", &i); if (n_read != 1) { (void) fprintf(stderr, "Failed to read int from stdin.\n"); exit(EXIT_FAILURE); } //(void) fprintf(stderr, "Read int %d\n", i); return i; } static float Read_Float(void) { float f; int n_read = scanf("%f\n", &f); if (n_read != 1) { (void) fprintf(stderr, "Failed to read float from stdin.\n"); exit(EXIT_FAILURE); } //(void) fprintf(stderr, "Read float %f\n", f); return f; } static double Metric(int column2[n_items], float distances[n_items][n_items]) { double sum = 0; int i; for (i = 0; i < n_items; i++) { sum += distances[i][column2[i]]; } return sum; } static int Random_Number(int limit) { int r = (int) (((double) limit * rand())/(RAND_MAX+1.0)); return(r); } static void Rearrange(int column2[n_items]) { /* Fisher-Yates shuffle, from Perl Cookbook, p. 121 */ int r; /* (void) fprintf(stderr, "Permute_Field(%d)\n", f); */ for (r = n_items - 1; r; r--) { int j = Random_Number(r + 1); if (r != j) { int temp; /* (void) fprintf(stderr, "Swapping %d and %d\n", r, j); */ temp = column2[r]; column2[r] = column2[j]; column2[j] = temp; } } } static void Evaluate_Iter_Metric( double metric, double base, char* base_rounded, int* tally) { char metric_rounded[80]; (void) gcvt(metric, 6, metric_rounded); if (!strcmp(base_rounded, metric_rounded)) {*tally = *tally + 1;} else if (base > metric) {*tally = *tally + 1;} } static void Print_p(int tally) { char p_string[80]; double signif_digits = log10(iters); int signif_digits_int = (int) signif_digits; char* start; int i; double p = (double) tally / (double) iters; if (signif_digits_int < signif_digits) {signif_digits_int++;} (void) sprintf(p_string, "%f", p); start = p_string; if (*start == '1') {printf("1");} start++; printf("."); start++; for (i = 0; i < signif_digits_int && *start; i++) { printf("%c", *start); start++; } printf(" p\n"); } static void Do_Stats(void) { int i; float distances[n_items][n_items]; int column2[n_items]; double sum = 0.0; double base; char base_rounded[80]; double mean; int tally = 0; double denom = (double) iters; srand(1); for (i = 0; i < n_items; i++) { int j; column2[i] = i; for (j = 0; j < n_items; j++) { distances[i][j] = Read_Float(); } } base = Metric(column2, distances); (void) printf("%f base distance\n", base); (void) gcvt(base, 6, base_rounded); for (i = 0; i < iters; i++) { double rearranged_metric; Rearrange(column2); rearranged_metric = Metric(column2, distances); Evaluate_Iter_Metric(rearranged_metric, base, base_rounded, &tally); sum += rearranged_metric; } mean = sum / denom; (void) printf("%f average rearranged distance\n", mean); Print_p(tally); } extern int main(int argc, char* argv[]) { Read_Options(argc, argv); n_items = Read_Int(); Do_Stats(); return EXIT_SUCCESS; }