############################################################################# # prosodic-diphone-extractor.praat ( Written by Kyuchul Yoon kyoon@ling.osu.edu ) # This script extracts from each of the textgrids a diphone list with prosodic information. # < [ { (CVC) (CVC) } ] > ==> IP-initial AP-initial PW-initial syllable-initial syllable-final PW-final AP-final IP-final # # and - indicate pause and diphone boundary respectively. # For example, from a textgrids (302000-01.TextGrid), the output diphones could be <#-p, , ... # syllable separator is " - ", and phone separator is " " ############################################################################## form Specify files and folders comment TextGrid INPUT word inFolder 03.lab.TextGrid.after.LTS.scheme word inFileExt_(with_dot) .TextGrid natural tierToExtract 2 natural tierPhonology 4 comment newline-separated TEXT file of DIPHONES word outFolder_(to_be_created) 03.lab.TextGrid.after.LTS.scheme.diphone.text word outFileExt_(with_dot) .prosodic.diphones comment TextGrid with a new TIER of DIPHONES natural tierToDuplicate 2 natural newTierPosition 5 word newTierName prosodic.diphones word textgridOutFolder_(to_be_created) 03.lab.TextGrid.after.LTS.scheme.diphone.textgrid word textgridOutFileExt_(with_dot) .TextGrid.prosodic.diphone endform # Make a list of files Create Strings as file list... fileList 'inFolder$'\*'inFileExt$' numFiles = Get number of strings pause 'numFiles' files identified. Continue? # Create a subdirectory system mkdir 'outFolder$' # Loop through each file for iFile to numFiles select Strings fileList # Filenames fileName$ = Get string... iFile filePrefix$ = fileName$ - inFileExt$ outFileName$ = filePrefix$ + outFileExt$ textgridOutFileName$ = filePrefix$ + textgridOutFileExt$ # Read a textgrid file Read from file... 'inFolder$'\'fileName$' Rename... textgrid numIntervals = Get number of intervals... tierToExtract for iInterval from 2 to (numIntervals-1) ############################################### # Preceding/following prosodic position labels of the current interval ############################################### # Initialize string variables prosLabOfStartTimeOfInterval$ = "none" prosLabOfEndTimeOfInterval$ = "none" startTimeOfInterval = Get starting point... tierToExtract iInterval endTimeOfInterval = Get end point... tierToExtract iInterval # Get the point number that corresponds in time to the start and end time of interval numPoints = Get number of points... tierPhonology # Loop through each point in point tier to determine point number of the two for iPoint to numPoints timeOfPoint = Get time of point... tierPhonology iPoint if startTimeOfInterval = timeOfPoint startPointNum = iPoint prosLabOfStartTimeOfInterval$ = Get label of point... tierPhonology startPointNum elsif endTimeOfInterval = timeOfPoint endPointNum = iPoint prosLabOfEndTimeOfInterval$ = Get label of point... tierPhonology endPointNum endif endfor # If there's no AP or IP boundaries, assign PWi or PWf labels if prosLabOfStartTimeOfInterval$ = "none" prosLabOfStartTimeOfInterval$ = "PWi" elsif prosLabOfEndTimeOfInterval$ = "none" prosLabOfEndTimeOfInterval$ = "PWf" endif # Exceptional case if it's the first interval, whose start is IP-initial, arbitrarily called "IP%" if iInterval = 2 prosLabOfStartTimeOfInterval$ = "IP%" endif ############################################################ # Assign prosodic boundary symbols < [ { } ] > based on tone labels obtained above ############################################################ tempStartLab$ = right$(prosLabOfStartTimeOfInterval$, 1) tempEndLab$ = right$(prosLabOfEndTimeOfInterval$, 1) # For the string in prosLabOfStartTimeOfInterval$ if tempStartLab$ = "%" startSymbol$ = "<" elsif tempStartLab$ = "a" startSymbol$ = "[" else tempStartLab$ = "i" startSymbol$ = "{" endif # For the string in prosLabOfEndTimeOfInterval$ if tempEndLab$ = "%" endSymbol$ = ">" elsif tempEndLab$ = "a" endSymbol$ = "]" else tempEndLab$ = "f" endSymbol$ = "}" endif ################################################### # Count the number of syllables for current interval. Filed separator is " - ". ################################################### intervalText$ = Get label of interval... tierToExtract iInterval tempString$ = intervalText$ lenTempString = length(tempString$) indexOfSep = index(tempString$, " - ") sylCount = 1 while indexOfSep <> 0 tempString$ = right$(tempString$, (lenTempString-(indexOfSep+2))) sylCount = sylCount + 1 indexOfSep = index(tempString$, " - ") lenTempString = length(tempString$) endwhile ######################################################### # Based on sylCount, extract each syllable from current interval and create diphones ######################################################### for iSyl to sylCount if sylCount <> 1 lenIntervalText = length(intervalText$) indexOfFieldSep = index(intervalText$, " - ") if indexOfFieldSep <> 0 sylExtracted$ = left$(intervalText$, (indexOfFieldSep-1)) remainIntervalText$ = right$(intervalText$, (lenIntervalText-(indexOfFieldSep+2))) intervalText$ = remainIntervalText$ else sylExtracted$ = intervalText$ endif else # If there's only one syllable-long text in the current interval sylExtracted$ = intervalText$ endif #################### # Extract prosodic diphones #################### # If iSyl is 1 or sylCount (i.e. first and last syllable), attach tone label accordingly. # Otherwise, the syllable is a PW-internal one ################################### # Count the number of phones for current syllable ################################### phoneCount = 1 tempSylExtracted$ = sylExtracted$ lenTempSylExtracted = length(tempSylExtracted$) indexOfSpace = index(tempSylExtracted$, " ") while indexOfSpace <> 0 remainSyl$ = right$(tempSylExtracted$, (lenTempSylExtracted-indexOfSpace)) phoneCount = phoneCount + 1 tempSylExtracted$ = remainSyl$ lenTempSylExtracted = length(tempSylExtracted$) indexOfSpace = index(tempSylExtracted$, " ") endwhile ############################ # For the first syllable of the interval text if iSyl = 1 boundarySign$ = startSymbol$ ################## # If it's the IP-initial edge if boundarySign$ = "<" for iPhone to phoneCount lenSylExtracted = length(sylExtracted$) indexOfSpace = index(sylExtracted$, " ") if indexOfSpace <> 0 phoneExtracted$ = left$(sylExtracted$, (indexOfSpace-1)) remainPhones$ = right$(sylExtracted$, (lenSylExtracted-indexOfSpace)) sylExtracted$ = remainPhones$ else phoneExtracted$ = sylExtracted$ endif if iPhone = 1 diphone$ = boundarySign$ + "#-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ elsif iPhone = 2 diphone$ = boundarySign$ + prevPhone$ diphone$ = diphone$ + "-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ elsif iPhone = phoneCount diphone$ = prevPhone$ + "-" diphone$ = diphone$ + phoneExtracted$ diphone$ = diphone$ + ")" fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ else diphone$ = "(" + prevPhone$ diphone$ = diphone$ + "-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ endif endfor ################## # If it's the other cases else for iPhone to phoneCount lenSylExtracted = length(sylExtracted$) indexOfSpace = index(sylExtracted$, " ") if indexOfSpace <> 0 phoneExtracted$ = left$(sylExtracted$, (indexOfSpace-1)) remainPhones$ = right$(sylExtracted$, (lenSylExtracted-indexOfSpace)) sylExtracted$ = remainPhones$ else phoneExtracted$ = sylExtracted$ endif # Deal with AP-f + AP-i or PW-f + PW-i sequences if boundarySign$ = "[" bSign1$ = "[" bSign2$ = "]" elsif boundarySign$ = "{" bSign1$ = "{" bSign2$ = "}" endif if iPhone = 1 diphone$ = prevPhone$ + "-" diphone$ = diphone$ + bSign2$ diphone$ = diphone$ + bSign1$ diphone$ = diphone$ + "-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ elsif iPhone = phoneCount diphone$ = prevPhone$ + "-" diphone$ = diphone$ + phoneExtracted$ diphone$ = diphone$ + ")" fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ else diphone$ = "(" + prevPhone$ diphone$ = diphone$ + "-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ endif endfor endif ############################# # For the last syllable of the interval text elsif iSyl = sylCount boundarySign$ = endSymbol$ for iPhone to phoneCount lenSylExtracted = length(sylExtracted$) indexOfSpace = index(sylExtracted$, " ") if indexOfSpace <> 0 phoneExtracted$ = left$(sylExtracted$, (indexOfSpace-1)) remainPhones$ = right$(sylExtracted$, (lenSylExtracted-indexOfSpace)) sylExtracted$ = remainPhones$ else phoneExtracted$ = sylExtracted$ endif if iPhone = 1 diphone$ = prevPhone$ + "-)(-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ elsif iPhone = phoneCount diphone$ = prevPhone$ + "-" diphone$ = diphone$ + phoneExtracted$ diphone$ = diphone$ + boundarySign$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' # If it's the end of an IP phrase, add an additional diphone with -# if boundarySign$ = ">" diphone$ = phoneExtracted$ + "-" diphone$ = diphone$ + "#" diphone$ = diphone$ + boundarySign$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' endif prevPhone$ = phoneExtracted$ else diphone$ = "(" + prevPhone$ diphone$ = diphone$ + "-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ endif endfor ############################################# # For the syllables in between the first and last of the interval text else for iPhone to phoneCount lenSylExtracted = length(sylExtracted$) indexOfSpace = index(sylExtracted$, " ") if indexOfSpace <> 0 phoneExtracted$ = left$(sylExtracted$, (indexOfSpace-1)) remainPhones$ = right$(sylExtracted$, (lenSylExtracted-indexOfSpace)) sylExtracted$ = remainPhones$ else phoneExtracted$ = sylExtracted$ endif if iPhone = 1 diphone$ = prevPhone$ + "-)(-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ elsif iPhone = phoneCount diphone$ = prevPhone$ + "-" diphone$ = diphone$ + phoneExtracted$ diphone$ = diphone$ + ")" fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ else diphone$ = "(" + prevPhone$ diphone$ = diphone$ + "-" diphone$ = diphone$ + phoneExtracted$ fileappend 'outFolder$'\'outFileName$' 'diphone$''tab$' prevPhone$ = phoneExtracted$ endif endfor endif endfor fileappend 'outFolder$'\'outFileName$' 'newline$' endfor pause Check the text file before continuing to next file! select TextGrid textgrid Remove endfor select Strings fileList Remove #################### END OF SCRIPT ######################