########################################################################## # 04.distance.in.eojeols.from-to.previous-following.comma tier extractor for Wagon: # Written by Kyuchul Yoon ( kyoon@ling.osu.edu ) # Extracts from a set of .TextGrid.lab files 04.distance.in.eojeols.from.previous.comma data field for Wagon training # The script assumes that you already have the TextGrid files labelled by professional K-ToBI labelers. # The script will read in all the TextGrid.lab files one by one from the directory 03.distance.in.syllables.from-to.previous-following.comma # and write the output files into 10.wagon-features\04.distance.in.eojeols.from-to.previous-following.comma. # The filename of the output files are .wagon.04 ########################################################################## form Select files word subFolderToProcess 10.wagon-features\03.distance.in.syllables.from-to.previous-following.comma word fileExtOfDoneFiles wagon.03 word tierNameToAdd1 dst.in.eoj.pr.comma word tierNameToAdd2 dst.in.eoj.ne.comma word outputSubFolder 10.wagon-features\04.distance.in.eojeols.from-to.previous-following.comma choice outputFileExt: 1 button wagon.04 endform # Get the list of filenames of TextGrid.lab files Create Strings as file list... fileList 'subFolderToProcess$'\*.'fileExtOfDoneFiles$' Sort numFiles = Get number of strings pause 'numFiles' labeled textgrids identified. Continue? # Loop throught each file for iFile to numFiles select Strings fileList # Get the name for a TextGrid.lab file doneFile$ = Get string... iFile filePrefix$ = doneFile$ - fileExtOfDoneFiles$ Read from file... 'subFolderToProcess$'\'doneFile$' Rename... textGrid numIntervals = Get number of intervals... 1 # Get the number of tiers so that you can add an additional tier at the end numTiers = Get number of tiers Duplicate tier... 1 (numTiers+1) 'tierNameToAdd1$' Duplicate tier... 1 (numTiers+2) 'tierNameToAdd2$' # Set the first/last interval text to naught Set interval text... (numTiers+1) 1 Set interval text... (numTiers+1) numIntervals Set interval text... (numTiers+2) 1 Set interval text... (numTiers+2) numIntervals # Initialize the distance (in number of syllables) from an immediately # preceding COMMA to 0 distFromPrevCO = 0 ######### Compute the distance, totalNumBtwCO, to the following COMMA ########### # This is only for the interval from the sentence beginning to the "first" occurrence of COMMA # ################################################################ totalNumBtwCO = 0 # Exclude the leading token iTokenCO = 2 ######### Get the interval text (i.e., token text) and the rightmost letter (i.e., #) ####### tempIntervalText$ = Get label of interval... 1 iTokenCO rightMostLetter$ = right$(tempIntervalText$, 1) repeat numEojeolBtwCOs = 0 # If the token is followed by a eojeol boundary "#", then increase the number of eojeols by one if rightMostLetter$ = "#" numEojeolBtwCOs = numEojeolBtwCOs + 1 endif # The total number of eojeols between two successive commas totalNumBtwCO = totalNumBtwCO + numEojeolBtwCOs # Get the label for the (next) interval text iTokenCO = iTokenCO + 1 tempIntervalText$ = Get label of interval... 1 iTokenCO indexOfSlash = index(tempIntervalText$, "/") intervalTextCO$ = left$(tempIntervalText$, (indexOfSlash-1)) if indexOfSlash <> 0 intervalTextCO$ = left$(tempIntervalText$, (indexOfSlash-1)) endif # If the interval text before trimming is , then mark it as so that it will terminate the repeat loop if tempIntervalText$ = "" intervalTextCO$ = "" # If the interval text after trimming is COMMA, then mark it as such so that it will terminate the loop elsif intervalTextCO$ = "COMMA" totalNumBtwCO = totalNumBtwCO + 1 intervalTextCO$ = "COMMA" else rightMostLetter$ = right$(tempIntervalText$, 1) endif # Repeat this block until you come to the sentence end, indicated by or to the COMMA until (intervalTextCO$ = "" or intervalTextCO$ = "COMMA") ############# End of distance to "first" COMMA block ###################### ############# Loop through each interval (token) and extract info ###############100 for iToken from 2 to (numIntervals-1) # Initialize the number of eojeol for the interval actualNumEojeol = 0 ######### Get the interval text (i.e., token text) ####### tempIntervalText$ = Get label of interval... 1 iToken indexOfSlash = index(tempIntervalText$, "/") intervalText$ = left$(tempIntervalText$, (indexOfSlash-1)) rightMostLetter$ = right$(tempIntervalText$, 1) ######### Compute the number of eojeols for the interval ############ if rightMostLetter$ = "#" actualNumEojeol = actualNumEojeol + 1 endif ########## End of eojeol number block ####################### ########## Compute the distance from/to previous/next COMMA ############ # If the current interval text is "COMMA", compute the distance of the interval to the # # immediately following comma and use that to compute the distances. # # This is for the interval from the first COMMA to the rest of the sentence. # ########################################################### # If the interval text is COMMA, then we need to calculate the inter-COMMA distance again # after we print out if intervalText$ = "COMMA" distFromPrevCO = distFromPrevCO + actualNumEojeol # Compute the distance to the next COMMA distFromNextCO = totalNumBtwCO - actualNumEojeol # Set interval text according to above result Set interval text... (numTiers+1) iToken 'distFromPrevCO' Set interval text... (numTiers+2) iToken 'distFromNextCO' # Initialize the distance from previous COMMA to zero distFromPrevCO = 0 ########## Compute the inter-COMMA distance ################## totalNumBtwCO = 1 iTokenCO = iToken + 1 # Get the label for the (next) interval text tempIntervalText$ = Get label of interval... 1 iTokenCO rightMostLetter$ = right$(tempIntervalText$, 1) indexOfSlash = index(tempIntervalText$, "/") intervalTextCO$ = left$(tempIntervalText$, (indexOfSlash-1)) repeat numEojeolCO2 = 0 if rightMostLetter$ = "#" numEojeolCO2 = numEojeolCO2 + 1 endif totalNumBtwCO = totalNumBtwCO + numEojeolCO2 # Increase the counter by one and move to the next interval text iTokenCO = iTokenCO + 1 tempIntervalText$ = Get label of interval... 1 iTokenCO indexOfSlash = index(tempIntervalText$, "/") if indexOfSlash <> 0 intervalTextCO$ = left$(tempIntervalText$, (indexOfSlash-1)) endif if tempIntervalText$ = "" intervalTextCO$ = "" elsif intervalTextCO$ = "COMMA" intervalTextCO$ = "COMMA" else rightMostLetter$ = right$(tempIntervalText$, 1) endif # Repeat this block until you come to the sentence end, indicated by or to the COMMA until (intervalTextCO$ = "" or intervalTextCO$ = "COMMA") totalNumBtwCO = totalNumBtwCO - 1 ########### End of inter-COMMA distance block ################## else distFromPrevCO = distFromPrevCO + actualNumEojeol distFromNextCO = totalNumBtwCO - actualNumEojeol totalNumBtwCO = totalNumBtwCO - actualNumEojeol # Set interval text according to above result Set interval text... (numTiers+1) iToken 'distFromPrevCO' Set interval text... (numTiers+2) iToken 'distFromNextCO' endif ################# End of distance from/to previous/next COMMA ############ endfor Edit pause Write to text file... 'outputSubFolder$'\'filePrefix$''outputFileExt$' Remove endfor select Strings fileList Remove #### END OF SCRIPT ####