########################################################################## # # perceptionResults.R # # copyright (c) 2007 by the Autumn Ling H286 class and Mary E. Beckman # Department of Linguistics, Ohio State University # # Script for analyzing the results of the perception experiment that we # did in class together on October 10, 2007, for the term project in the # Linguistics H286 "Analyzing the Sounds of Languages" course. # Set the working directory to the directory where the data file is kept. # The path will depend on how you have your computer organized. setwd('c:/Lx286/projectParts') # Read the data file in as a data frame (a type of table where you can # mix columns of categorical data and with columns of numerical data). dat=read.table("Au2007perception.txt",header=T) # Check to see that it read in correctly, by seeing how many rows and # columns you have, like this: dim(dat) # [1] 760 6 # If the above line of two numbers appears on your R console, the # data have been read in correctly. There should be 6 columns and as # many rows as there are students (minus Chris) * 40. # Here are the names of the six columns that you should have. names(dat) # [1] "listenerDistinguishes" "listener" "talkerDistinguishes" # [4] "talker" "stimulus" "response" # Add a seventh column to the data frame, where the value for every row # in this column is initially set to NA (for "not available"). Call # this column of data "correct" since each NA will eventually be set to # values indicating whether the response was a correct identification # of the intended word. dat$correct=NA # Loop through the rows of the table, comparing the stimulus and # response and is 1 if they match, 0 if they don't for (i in 1:dim(dat)[1]) { dat$correct[i]= as.numeric(dat[i,"stimulus"]==as.character(dat[i,"response"])) } # See how accurate listeners were overall. (Since we have 1 for every # response that was correct and 0 for every other one, taking the mean # of this column gives us the proportion of correct responses, and # multiplying by 100 gives us the percent of correct responses. The # round() command rounds up or down to the nearest integer value. round(mean(dat$correct)*100) # [1] 62 # Now calculate accuracy for various subsets. For example, here is how # to calculate the accuracy for the subset of tokens where the talker # distinguishes the two words and so does the listener: YY=subset(dat, talkerDistinguishes=="yes" & listenerDistinguishes=="yes") round(mean(YY$correct)*100) # Here's how to calculate the accuracy for the subset of tokens where # the talker distinguishes the two words but the listener does not: YN=subset(dat, talkerDistinguishes=="yes" & listenerDistinguishes=="no") round(mean(YN$correct)*100) # Here's how to calculate how many stimuli were identified as and # how many as : summary(dat$response) # hawed hod # 354 406 # Here's how to figure out which kind of error was more likely when # both the listener and the talker distinguish the two words. # First count the cases where the talker intended but was heard # as saying : dim(subset(YY, correct==0 & stimulus=="hawed" & response=="hod"))[1] # and then count the cases where the talker said but was heard as # saying : dim(subset(YY, correct==0 & stimulus=="hod" & response=="hawed"))[1] # Here's how to figure out which kind of error was more likely when # the listener distinguishes the two words but the talker does not. NY=subset(dat, talkerDistinguishes=="no" & listenerDistinguishes=="yes") dim(subset(NY, correct==0 & stimulus=="hawed" & response=="hod"))[1] dim(subset(NY, correct==0 & stimulus=="hod" & response=="hawed"))[1] # Here's how to figure out which word the listener was more likely to # choose if the listener does not distinguish the two words. summary(subset(dat, listenerDistinguishes=="no")$response) # Here's how to figure out how accurate a specific classmate (such # as Alexa, in this case) was at identifying the intended word produced # by classmates who do and do not make the distinction. Alexa=subset(dat, listener=="Alexa") round(mean(subset(Alexa, talkerDistinguishes=="yes")$correct)*100) round(mean(subset(Alexa, talkerDistinguishes=="no")$correct)*100) # Here's how to figure out how accurate everyone was in identifying # a specific classmate's productions, illustrated here with Elyse as # the designated talker. round(mean(subset(dat, listener=="Elyse")$correct)*100) # Here's how to figure out how accurate people were at identifying # their own productions. self=subset(dat, as.character(talker)==as.character(listener)) round(mean(self$correct)*100) # Ask me if you want help in understanding the syntax of any of these # commands or if you want help writing R commands to answer other # questions you might have about the data.