#################################################################################################### # diphone-locator-for-TextGrids-GREEDY.ALGORITHM.praat ( Written by Kyuchul Yoon kyoon@ling.osu.edu ) # Same function as diphone-locator-for-TextGrids9.praat # but with Greedy Algorithm ################################################################################## #################### BASIC ALGORITHM ############################################# # Given 400 TextGrids files with diphones tier, this script makes a list of # filename-diphoneTypeCount, remembering all the diphoneTypes and their # total count for each file, gets the name of the file that has the most # diphoneTypeCount, marks all the diphones for that file (and moving the file to a # different 'done' folder), removes all those diphones from the other 399 files # (deleting old 399 files and writing the updated files to the same directory), # and repeat the above procedures until there are no more remaining diphoneTypes. # # This entails, for each TextGrid file, that the script # (1) remembers all distinct diphoneTypes, storing in an array variable # so that those can be removed from the rest of the files in the loop # (2) remembers the number of those diphoneTypes so that comparison # among files can be possible. This comparison is done to choose # the file with the most diphoneTypes. ################################################################################## form Specify forms and files word inFolder lab.TextGrid.after.LTS.scheme.diphone.textgrid natural diphoneTier 5 word inFileExt_(with_dot) .TextGrid.prosodic.diphone word doneFolder_(to_be_created) marked.diphones.GREEDY.ALGORITHM word diphoneCount_(to_be_created) DIPHONE.COUNT endform # Create a subfolder system mkdir 'doneFolder$' # Make a list of all inFolder files Create Strings as file list... fileList 'inFolder$'/*'inFileExt$' Sort numFiles = Get number of strings pause 'numFiles' files identified. Continue? ################################################# # FIND THE FILE WITH THE MOST DIPHONE TYPE COUNT # and SET RECURRENT DIPHONE TOKEN INTERVAL TO "" ################################################# # Initialize the mostTotalNumDiphoneTypes to zero mostTotalNumDiphoneTypes = 0 # Loop through each file for iFile to numFiles # Read a file select Strings fileList fileName$ = Get string... iFile Read from file... 'inFolder$'/'fileName$' Rename... inFile # Duplicate the diphone tier as a reference tier Duplicate tier... diphoneTier (diphoneTier+1) marked.diphones.ref # Check the number of intervals for the diphone tier and set the total number numIntervals = Get number of intervals... diphoneTier totalNumDiphoneTypes = numIntervals - 2 # Loop through each interval for iInterval from 2 to (numIntervals-1) intervalText$ = Get label of interval... diphoneTier iInterval arrayIntervalText'iInterval'$ = intervalText$ # Do the comparison with the rest of the interval texts if iInterval > 2 if iInterval > 2 # Compare the current with all arrayIntervalText$ variables for iCompare from 2 to (iInterval-1) # If the current is the same as the immediately preceding one, # then set the current interval text to nothing and reduce the # number of total diphone types by one if arrayIntervalText'iInterval'$ = arrayIntervalText'iCompare'$ Set interval text... diphoneTier iInterval totalNumDiphoneTypes = totalNumDiphoneTypes - 1 endif endfor endif endfor ############### TIER CLEANING PROCESS ################### # Clean up the empty intervals by duplicating the diphone tier and then # copying only filled intervals to a new tier "marked.diphones" markedDiphones = diphoneTier + 1 # Insert a new tier, i.e., markedDiphones tier Insert interval tier... markedDiphones marked.diphones # Get the number of old diphone tier numIntervalsOld = Get number of intervals... diphoneTier # Check each interval and copy non-empty intervals to marked.diphones tier for iIntervalOld from 2 to (numIntervalsOld-1) intervalTextOld$ = Get label of interval... diphoneTier iIntervalOld # If non-empty, copy it to marked.diphones tier if intervalTextOld$ <> "" # Get the interval boundaries of the old diphone tier startTime = Get starting point... diphoneTier iIntervalOld endTime = Get end point... diphoneTier iIntervalOld # Set the interval number of the new markedDiphones tier iIntervalNew = 1 # Set the boundary existence flag to 1 alreadyFlag = 1 # Get the number of intervals of the new markedDiphones tier numIntervalsNew = Get number of intervals... markedDiphones # Repeat the following while (alreadyFlag = 1 and iIntervalNew <> (numIntervalsNew+1)) # Get the time of intervals of the new tier startT = Get starting point... markedDiphones iIntervalNew nextT = Get end point... markedDiphones iIntervalNew # For intervening diphone, do nothing if (startT = startTime and nextT = endTime) alreadyFlag = 2 # To add an interval immediately after an existing interval elsif startT = startTime alreadyFlag = 2 Insert boundary... markedDiphones endTime # To add an interval immediately before an existing interval elsif startT = endTime alreadyFlag = 2 Insert boundary... markedDiphones startTime endif # Go to the next interval of the new tier iIntervalNew = iIntervalNew + 1 endwhile # There's no existing interval, so insert two boundaries for one interval if alreadyFlag = 1 Insert boundary... markedDiphones startTime Insert boundary... markedDiphones endTime endif # Set the new interval text newIntervalNum = Get interval at time... markedDiphones startTime Set interval text... markedDiphones newIntervalNum 'intervalTextOld$' endif endfor # Delete the old tier Remove tier... diphoneTier ###################### END OF CLEANING ########################## # Delete the old file and write the new one filedelete 'inFolder$'/'fileName$' Write to text file... 'inFolder$'/'fileName$' select TextGrid inFile #Edit #pause Check! Remove # Compare the current totalNumDiphoneTypes with the previous mostTotalNumDiphoneTypes # and set the filename and number of diphone types for that file if totalNumDiphoneTypes > mostTotalNumDiphoneTypes mostTotalNumDiphoneTypes = totalNumDiphoneTypes mostDiphoneTypesFileName$ = fileName$ # Save the mostTotalNumDiphoneTypes to compute the number of all diphone type count allDiphoneTypeCount = mostTotalNumDiphoneTypes endif fileappend WHICH.FILE 'iFile''tab$''mostDiphoneTypesFileName$''tab$''mostTotalNumDiphoneTypes''newline$' endfor select Strings fileList Remove ##################################################### # REPEAT UNTIL SUM OF DIPHONE TYPES OF ALL FILES IS 0 ##################################################### # MOVE THE FILE TO DONE FOLDER and # REMOVE THE DIPHONE TYPES FROM THE OTHER FILES ##################################################### # REPEAT UNTIL SUM OF DIPHONE TYPES OF ALL FILES IS 0 sumOfDiphoneTypes = 1 # Initialize the mostTotalNumDiphoneTypes to zero mostTotalNumDiphoneTypes = 0 while (sumOfDiphoneTypes <> 0) #pause Ready for the next while loop? # Now that we have the filename and the number of diphone types for the file # that has the most number of diphone types, move that file to the 'done' folder Read from file... 'inFolder$'/'mostDiphoneTypesFileName$' Rename... doneFile Write to text file... 'doneFolder$'/'mostDiphoneTypesFileName$' filedelete 'inFolder$'/'mostDiphoneTypesFileName$' select TextGrid doneFile numIntervalsDone = Get number of intervals... diphoneTier # Store the non-empty diphoneTypes to an array variable iArray = 0 for iIntervalDone from 2 to (numIntervalsDone-1) intervalTextDone$ = Get label of interval... diphoneTier iIntervalDone if intervalTextDone$ <> "" iArray = iArray + 1 arrayIntervalTextDone'iArray'$ = intervalTextDone$ endif endfor select TextGrid doneFile Remove # And then remove those diphone types from the rest of the files in 'inFolder' Create Strings as file list... fileList 'inFolder$'/*'inFileExt$' Sort numFiles = Get number of strings # pause 'numFiles' files identified. Continue? # Initialize sumOfDiphoneTypes = 0 mostTotalNumDiphoneTypes = 0 # Loop through each file for iFile to numFiles select Strings fileList fileName$ = Get string... iFile Read from file... 'inFolder$'/'fileName$' Rename... inFile numIntervalsRest = Get number of intervals... diphoneTier # Temporarily, excluding the leading/trailing interval, # the number of intervals of diphone type count should be -2 realNumIntervalsRest = numIntervalsRest - 2 for iIntervalRest from 2 to (numIntervalsRest-1) select TextGrid inFile intervalTextRest$ = Get label of interval... diphoneTier iIntervalRest # Comare the intervalTextRest$ with the array variable # and set the interval text to nothing if it's the same with # any of the array variables flag = 1 i = 1 while (flag = 1 and intervalTextRest$ <> "" and i <= iArray) if intervalTextRest$ = arrayIntervalTextDone'i'$ flag = 2 realNumIntervalsRest = realNumIntervalsRest - 1 endif i = i + 1 endwhile # If the flag is changed, then set the current interval text to nothing if flag = 2 Set interval text... diphoneTier iIntervalRest endif # If the intervalTextRest$ is "", then reduce the count by one intervalTextRest$ = Get label of interval... diphoneTier iIntervalRest if intervalTextRest$ = "" realNumIntervalsRest = realNumIntervalsRest - 1 endif endfor # Update the sumOfDiphoneTypes sumOfDiphoneTypes = sumOfDiphoneTypes + realNumIntervalsRest ########## REPEAT THE CLEANING PROCESS ############## # Clean up the empty intervals by duplicating the diphone tier and then # copying only filled intervals to a new tier "marked.diphones" markedDiphones = diphoneTier + 1 # Insert a new tier, i.e., markedDiphones tier Insert interval tier... markedDiphones marked.diphones # Get the number of old diphone tier numIntervalsOld = Get number of intervals... diphoneTier # Check each interval and copy non-empty intervals to marked.diphones tier for iIntervalOld from 2 to (numIntervalsOld-1) intervalTextOld$ = Get label of interval... diphoneTier iIntervalOld # If non-empty, copy it to marked.diphones tier if intervalTextOld$ <> "" # Get the interval boundaries of the old diphone tier startTime = Get starting point... diphoneTier iIntervalOld endTime = Get end point... diphoneTier iIntervalOld # Set the interval number of the new markedDiphones tier iIntervalNew = 1 # Set the boundary existence flag to 1 alreadyFlag = 1 # Get the number of intervals of the new markedDiphones tier numIntervalsNew = Get number of intervals... markedDiphones # Repeat the following while (alreadyFlag = 1 and iIntervalNew <> (numIntervalsNew+1)) # Get the time of intervals of the new tier startT = Get starting point... markedDiphones iIntervalNew nextT = Get end point... markedDiphones iIntervalNew # For intervening diphone, do nothing if (startT = startTime and nextT = endTime) alreadyFlag = 2 # To add an interval immediately after an existing interval elsif startT = startTime alreadyFlag = 2 Insert boundary... markedDiphones endTime # To add an interval immediately before an existing interval elsif startT = endTime alreadyFlag = 2 Insert boundary... markedDiphones startTime endif # Go to the next interval of the new tier iIntervalNew = iIntervalNew + 1 endwhile # There's no existing interval, so insert two boundaries for one interval if alreadyFlag = 1 Insert boundary... markedDiphones startTime Insert boundary... markedDiphones endTime endif # Set the new interval text newIntervalNum = Get interval at time... markedDiphones startTime Set interval text... markedDiphones newIntervalNum 'intervalTextOld$' endif endfor # Delete the old tier Remove tier... diphoneTier ################### END OF CLEANING ################### # Write the updated file to inFolder$ Write to text file... 'inFolder$'/'fileName$' select TextGrid inFile #Edit #pause Check! Remove # Compare the current totalNumDiphoneTypes with the previous mostTotalNumDiphoneTypes # and set the filename and number of diphone types for that file if realNumIntervalsRest > mostTotalNumDiphoneTypes mostTotalNumDiphoneTypes = realNumIntervalsRest mostDiphoneTypesFileName$ = fileName$ endif fileappend WHICH.FILE 'iFile''tab$''mostDiphoneTypesFileName$''tab$''mostTotalNumDiphoneTypes''newline$' endfor select Strings fileList Remove # Compute the number of all diphones processed allDiphoneTypeCount = allDiphoneTypeCount + mostTotalNumDiphoneTypes endwhile pause Total number of diphone type count processed is 'allDiphoneTypeCount' ############## END OF SCRIPT ###################