########################################################################## # 11.distance.in.syllables.from.sentence.beginning&end tier extractor for Wagon: # Written by Kyuchul Yoon ( kyoon@ling.osu.edu ) # Extracts from a set of .TextGrid.lab files 11.distance.in.syllables.from.sentence.beginning&end data field for Wagon training # The script assumes that you already have the TextGrid files labelled by professional K-ToBI labelers. # The script will read in all the TextGrid.lab files one by one from the directory 08.distance.in.eojeols.from.previous.AP&IP # and write the output files into 10.wagon-features\11.distance.in.syllables.from.sentence.beginning&end # The filename of the output files are .wagon.11 ########################################################################## form Select files word subFolderToProcess 10.wagon-features\08.distance.in.eojeols.from.previous.AP&IP word fileExtOfDoneFiles wagon.08 word outputSubFolder 10.wagon-features\11.distance.in.syllables.from.sentence.beginning&end word tierNameToAdd1 dst.in.syl.S.start word tierNameToAdd2 dst.in.syl.S.end real tierDiffTol 0.05 choice outputFileExt: 1 button wagon.11 endform # Get the list of filenames of TextGrid.lab files Create Strings as file list... fileList 'subFolderToProcess$'\*.'fileExtOfDoneFiles$' Sort numFiles = Get number of strings pause 'numFiles' labeled textgrids identified. Continue? # Loop throught each file for iFile to numFiles select Strings fileList # Get the name for a TextGrid.lab file doneFile$ = Get string... iFile filePrefix$ = doneFile$ - fileExtOfDoneFiles$ Read from file... 'subFolderToProcess$'\'doneFile$' Rename... textGrid numIntervals = Get number of intervals... 1 # Get the number of tiers so that you can add an additional tier at the end numTiers = Get number of tiers Duplicate tier... 1 (numTiers+1) 'tierNameToAdd1$' Duplicate tier... 1 (numTiers+2) 'tierNameToAdd2$' # Set the first/last interval text to naught Set interval text... (numTiers+1) 1 Set interval text... (numTiers+1) numIntervals Set interval text... (numTiers+2) 1 Set interval text... (numTiers+2) numIntervals ######## Block for calculating the total number of syllables for the whole sentence ####### ######## This will be used to compute the distance from the sentence end ####### ################################################################ # Initialize the number of total syllables for the sentence totalNum = 0 for iToken from 2 to (numIntervals-1) ######### Get the interval text (i.e., token text) ####### tempIntervalText$ = Get label of interval... 1 iToken indexOfSlash = index(tempIntervalText$, "/") intervalText$ = left$(tempIntervalText$, (indexOfSlash-1)) numTotalChar = length(intervalText$) # Initialize numSyl = 1 # If the intervalText$ is one of either "PERIOD", "NPERIOD", "PLUS", "COLON", # "(R/L)DQUOTE", "(R/L)SQUOTE", "COMMA", or "(R/L)PAREN", do not increase the number of # syllables for that interval. But if it's either a NUMBER, a HYPHEN, or a FOREIGN, increase the number # of syllables by one (assuming that NUMBER/HYPHEN/FOREIGN is produced eith at least one (or more) syllable(s). if (intervalText$ = "PERIOD" or intervalText$ = "NPERIOD" or intervalText$ = "COMMA" or intervalText$ = "HYPHEN" ... or intervalText$ = "PLUS" or intervalText$ = "COLON" or intervalText$ = "LRB-" or intervalText$ = "RRB-" ... or intervalText$ = "DRQUOTE" or intervalText$ = "DLQUOTE" or intervalText$ = "SLQUOTE" ... or intervalText$ = "SRQUOTE" or intervalText$ = "LRB" or intervalText$ = "RRB") numSyl = numSyl-1 else while index(intervalText$, "-") indexOfHyphen = index(intervalText$, "-") # Calculate the number of the rest of the characters starting from the location of "-" subNumTotalChar = numTotalChar - indexOfHyphen intervalText$ = right$(intervalText$, subNumTotalChar) numTotalChar = length(intervalText$) numSyl = numSyl +1 endwhile endif totalNum = totalNum + numSyl endfor ################# End of sentence length detection block #################### # Initialize the distance (in number of syllables) from the sentence beginning to zero distFromSentBegin = 0 ############# Loop through each interval (eojeol) and extract info ############### for iToken from 2 to (numIntervals-1) # Initialize the number of eojeols actualNumSyl = 1 ######### Get the interval text (i.e., token text) ####### tempIntervalText$ = Get label of interval... 1 iToken indexOfSlash = index(tempIntervalText$, "/") intervalText$ = left$(tempIntervalText$, (indexOfSlash-1)) ######### Compute the number of syllables for the interval ################# # If the interval is one of the "unpronounced" symbols, do not increase the number # # Otherwise, loop through each hyphen "-" and count the number of syllables # ########################################################### if (intervalText$ = "PERIOD" or intervalText$ = "NPERIOD" or intervalText$ = "COMMA" or intervalText$ = "HYPHEN" ... or intervalText$ = "PLUS" or intervalText$ = "COLON" or intervalText$ = "LRB-" or intervalText$ = "RRB-" ... or intervalText$ = "DRQUOTE" or intervalText$ = "DLQUOTE" or intervalText$ = "SLQUOTE" ... or intervalText$ = "SRQUOTE" or intervalText$ = "LRB" or intervalText$ = "RRB") actualNumSyl = actualNumSyl-1 else numTotalChar = length(intervalText$) # Count the number of syllables for that eojeol (e.g. "peu-lang-seu" is 3-syllable long) # If there is no "-" in the eojeol, then the number of syllable for that eojeol is numSyl, i.e. 1 syllable. # Otherwise, repeat the following procedure to count the number of hyphens while index(intervalText$, "-") indexOfHyphen = index(intervalText$, "-") # Calculate the number of the rest of the characters starting from the location of "-" subNumTotalChar = numTotalChar - indexOfHyphen intervalText$ = right$(intervalText$, subNumTotalChar) numTotalChar = length(intervalText$) actualNumSyl = actualNumSyl+1 endwhile endif ########## End of syllable number block ############################ # Compute the distance from the sentence beginning distFromSentBegin = distFromSentBegin + actualNumSyl # Compute the distance from the end of the sentence distFromSentEnd = totalNum - distFromSentBegin # Set interval text according to above result Set interval text... (numTiers+1) iToken 'distFromSentBegin' Set interval text... (numTiers+2) iToken 'distFromSentEnd' endfor Edit pause Write to text file... 'outputSubFolder$'\'filePrefix$''outputFileExt$' Remove endfor select Strings fileList Remove #### END OF SCRIPT ####