BEGIN { while ((getline l < "dict.txt") > 0) { split(l, la, "\t"); star[la[1]] = la[2]; trns[la[1]] = la[3]; } } /./ { for(f=1; f<=NF; f++) { sfx = match($f, /\.[^.]+$/); if (sfx != 0) basename = substr($f, 1, sfx); else basename = $f; outfile = basename "ann"; errpipe = "sort -u > " basename "err"; print "annotate: processing file", $f, "->", outfile > "/dev/stderr"; while ((getline l < $f) > 0) { if (l ~ /^[^!-z]./) { if (l=="¡C" || l=="¡B" || l=="¡u" || l=="¡v") { # don't complain about unknown words printf "%s", l > outfile; } else { w = l; gsub(/ /, "", w); if (w in trns) { t = trns[w]; n = split(l, la, " "); m = split(t, ta, " "); if (m != n) printf "problematic entry: %s %s\n", w, t | errpipe; printf "%s//%s", la[1], ta[1] > outfile; for(i=2; i<=n; i++) { printf "\t%s//%s", la[i], ta[i] > outfile; } if (star[w] == "*") printf "\tCHECK*" > outfile; else if (star[w] == "?") printf "\tCHECK?" > outfile; } else { printf "unknown word: %s, guessing ...", w | errpipe; n = split(l, la, " "); for(i=1; i<=n; i++) { if (la[i] in trns) { printf "%s//%s???\t", la[i], trns[la[i]] > outfile; } else { printf "%s//DUNNO???\t", la[i] > outfile; printf ", unknown character: %s", la[i] | errpipe; } } printf "\n" | errpipe; } } } else printf "%s", l > outfile; printf "\n" > outfile; } fflush(""); close($f); close(outfile); close(errpipe); } }