########################################################################### # Checks the number of sentences, the sentence end being signaled by "SFN" (POS). # Written by Kyuchul Yoon ( kyoon@ling.osu.edu ) ############################################################################ # Specify files and folders form Select files comment How many sentences should each file contain? natural numSentences 8 word inputFileExt group word sentSeparator SFN integer colNumForSentSeparator 16 word outputFile file-info.txt endform Create Strings as file list... fileList *.'inputFileExt$' Sort numFiles = Get number of strings pause 'numFiles' labeled textgrids identified. Continue? # Loop throught each file for iFile to numFiles # Get the input filename select Strings fileList doneFile$ = Get string... iFile Read Strings from raw text file... 'doneFile$' Rename... myStrings numLines = Get number of strings # Initialize the index for the number of sentences iSentence = 1 # Loop through each line for iLine to numLines # ##### By examining the file numbers to omit, decide which output file to use # if (sentenceNum1 <> iSentence) and (sentenceNum2 <> iSentence) # outFileToUse$ = "selected.txt" # else # outFileToUse$ = "omitted.txt" # endif currentLine$ = Get string... iLine tempString$ = currentLine$ ####### Get the column string that the user designated as separating sentences ####### for iColumn to (colNumForSentSeparator-1) lenCurrentLine = length(tempString$) iTab = index(tempString$, tab$) tempString$ = right$(tempString$, (lenCurrentLine-iTab)) endfor ##################### End of "gettting to" the column string ################ #### Now, actually extract the first three letters which could be "SFN" sentSep$ = left$(tempString$, 3) # If the line is indeed the last token (its POS being SFN), then increase the sentence number if (sentSep$ = "SFN") iSentence = iSentence + 1 endif endfor realNumSent = iSentence-1 if realNumSent <> numSentences fileappend 'outputFile$' 'doneFile$''tab$''realNumSent''newline$' else fileappend 'outputFile$' 'doneFile$''tab$'OK'newline$' endif select Strings myStrings Remove endfor select Strings fileList Remove #### END OF SCRIPT ####