##################################################################################### # Phonological tone tier infor extractor for Wagon: Written by Kyuchul Yoon ( kyoon@ling.osu.edu ) # Extracts from a set of .TextGrid.done files data fields for Wagon training # Data fields include (1) romanized eojeols, (2) number of syllables for each eojeol, # (3) type of boundary (AP, IP, or none) # (4) number of syllables from an immediately preceding AP boundary (marked by "LHa") # (5) number of syllables from an immediately preceding IP boundary (marked by "HL%", "H%", or "L%" # (6) number of syllables from the sentence beginning, and #??????????????? (7) number of syllables from the sentence ending. # The script assumes that you already have the TextGrid files labelled by professional K-ToBI labelers. # The script will read in all the TextGrid.done files one by one from the current directory # and write the output files into Wagon/ subdirectory. (There should be a "Wagon" subdirectory) # The filename of the output files are .wagon ##################################################################################### # Specify files and folders form Select files word fileExtOfDoneFiles TextGrid.done word outputSubFolder Wagon endform # Get the list of filenames of TextGrid.done files Create Strings as file list... fileList *.'fileExtOfDoneFiles$' numFiles = Get number of strings # Loop throught each file for iFile to numFiles select Strings fileList # Get the name for a TextGrid.done file doneFile$ = Get string... iFile Read from file... 'doneFile$' # If an old output file exists, delete it first and then write out the new file filedelete 'outputSubFolder'\'doneFile$'.wagon # Count the number of syllables for that eojeol (e.g. "peu-lang-seu" is 3-syllable long) # Initialize the distance (in number of syllables) from an immediately # preceding AP, IP, and the sentence beginning to 1, 0, 0, and 0, respectively distFromPrevAP = 0 distFromPrevIP = 0 distFromSentBegin = 0 # Get the number of intervals from the first tier (word tier), i.e. get the number of eojeols (=numIntervals) numIntervals = Get number of intervals... 1 # Get rid of the leading/following label newNumIntervals = numIntervals - 2 # Loop through each interval (eojeol) and extract info for iEojeol to newNumIntervals # Initialize the number of syllable for the eojeol actualNumSyl = 1 # Get the interval text (i.e., eojeol text), excluding the label intervalText$ = Get label of interval... 1 (iEojeol+1) fileappend 'outputSubFolder$'\'doneFile$'.wagon 'intervalText$''tab$' numTotalChar = length(intervalText$) # If there is no "-" in the eojeol, then the number of syllable for that eojeol is numSyl, i.e. 1 syllable. # Otherwise, repeat the following procedure to count the number of hyphens while index(intervalText$, "-") indexOfHyphen = index(intervalText$, "-") # Calculate the number of the rest of the characters starting from the location of "-" subNumTotalChar = numTotalChar - indexOfHyphen intervalText$ = right$(intervalText$, subNumTotalChar) numTotalChar = length(intervalText$) actualNumSyl = actualNumSyl +1 endwhile fileappend 'outputSubFolder$'\'doneFile$'.wagon 'actualNumSyl''tab$' # Get the RHS end time of the interval endTimeOfInterval = Get end point... 1 (iEojeol+1) # Get the number of points from the second, phonology, tier and loop through # all the points to extract the one point that "roughly" corresponds to the endTime # of the interval from word tier # Outputs the eojeol & boundary info into a file (Feature number (1), (2), & (3)) numPoints = Get number of points... 2 for iPoint to numPoints pointLabel$ = Get label of point... 2 iPoint timePoint = Get time of point... 2 iPoint dif = abs(timePoint - endTimeOfInterval) if dif < 0.005 if pointLabel$ = "LHa" # Compute the distance from an immediately preceding AP distFromPrevAP = distFromPrevAP + actualNumSyl # Compute the distance from an immediately preceding IP distFromPrevIP = distFromPrevIP + actualNumSyl # Compute the distance from the sentence beginning distFromSentBegin = distFromSentBegin + actualNumSyl fileappend 'outputSubFolder$'\'doneFile$'.wagon ... 'pointLabel$''tab$' ... 'distFromPrevAP''tab$''distFromPrevIP''tab$' ... 'distFromSentBegin''newline$' # Reset the distance from an immediately preceding AP to zero distFromPrevAP = 0 elsif pointLabel$ = "HL%" distFromPrevAP = distFromPrevAP + actualNumSyl distFromPrevIP = distFromPrevIP + actualNumSyl distFromSentBegin = distFromSentBegin + actualNumSyl fileappend 'outputSubFolder$'\'doneFile$'.wagon 'pointLabel$''tab$' ... 'distFromPrevAP''tab$''distFromPrevIP''tab$' ... 'distFromSentBegin''newline$' # Reset the distance from an immediately preceding IP & AP to zero distFromPrevIP = 0 distFromPrevAP = 0 elsif pointLabel$ = "H%" distFromPrevAP = distFromPrevAP + actualNumSyl distFromPrevIP = distFromPrevIP + actualNumSyl distFromSentBegin = distFromSentBegin + actualNumSyl fileappend 'outputSubFolder$'\'doneFile$'.wagon 'pointLabel$''tab$' ... 'distFromPrevAP''tab$''distFromPrevIP''tab$' ... 'distFromSentBegin''newline$' # Reset the distance from an immediately preceding IP & AP to zero distFromPrevIP = 0 distFromPrevAP = 0 elsif pointLabel$ = "L%" distFromPrevAP = distFromPrevAP + actualNumSyl distFromPrevIP = distFromPrevIP + actualNumSyl distFromSentBegin = distFromSentBegin + actualNumSyl fileappend 'outputSubFolder$'\'doneFile$'.wagon 'pointLabel$''tab$' ... 'distFromPrevAP''tab$''distFromPrevIP''tab$' # Reset the distance from an immediately preceding IP & AP to zero distFromPrevIP = 0 distFromPrevAP = 0 ... 'distFromSentBegin''newline$' else distFromPrevAP = distFromPrevAP + actualNumSyl distFromPrevIP = distFromPrevIP + actualNumSyl distFromSentBegin = distFromSentBegin + actualNumSyl fileappend 'outputSubFolder$'\'doneFile$'.wagon 'pointLabel$''tab$' ... 'distFromPrevAP''tab$''distFromPrevIP''tab$' ... 'distFromSentBegin''newline$' endif endif endfor endfor endfor select Strings fileList Remove #### END OF SCRIPT ####