# qmlDay3_part2.R # # R code and notes for second part of third 1.5 hour session of week-long # course in quantitative methods in linguistics, given at the "mini-institute" # after the LSA Summer Meeting, 14-18 July 2008 ... continued # # (c) 2008, Mary E. Beckman, Cynthia Clopper, Shari Speer (Ohio State # University, Department of Linguistics) # # Assumes that you've already read in data sets, etc., using code in # qmlDay3_part1.R file. # Plot the distribution of F1 values in the Hillenbrand et al. dataset. hist(hill$F1, freq=FALSE, main="", xlab="Hillenbrand et al. (1995) vowels, F1 frequency (Hz)") # Plot the distribution of F1 values in the Hillenbrand et al. dataset, # differentiating low vowels from non-low vowels. brks=seq(300,1400,100) x=subset(hill, vowel=="ae" | vowel=="ah" | vowel=="aw") y=subset(hill, vowel!="ae" & vowel!="ah" & vowel!="aw") hist(y$F1, breaks=brks, freq=FALSE, ylim=c(0,0.0035), main="", col="gray40", xlab="Hillenbrand et al. (1995) vowels, F1 frequency (Hz)") hist(x$F1, breaks=brks, freq=FALSE, density=10, add=T) # Use t-test to confirm that low vowels have significantly different # mean F1 from mid and high vowels. t.test(x$F1,y$F1) # Plot the distribution of F1 values in the high and mid lax front vowels # in the Hillenbrand et al. dataset. brks=seq(300,1100,50) x=subset(hill, vowel=="ih") y=subset(hill, vowel=="eh") hist(y$F1, breaks=brks, freq=FALSE, ylim=c(0,0.007), main="", col="gray40", xlab="Hillenbrand et al. (1995) [E] and [I] F1 frequency (Hz)") hist(x$F1, breaks=brks, freq=FALSE, density=10, add=T) legend("topright",c("head","hid"),col=c("gray40","black"),density=c(0,10)) # Use t-test to ask whether that mid vowel has significantly different # mean F1 from high vowel. t.test(x$F1,y$F1) # Now do the same thing for the tense and lax high front vowels. brks=seq(300,600,50) x=subset(hill, vowel=="ih") y=subset(hill, vowel=="iy") hist(y$F1, breaks=brks, freq=FALSE, ylim=c(0,0.007), main="", col="gray40", xlab="Hillenbrand et al. (1995) [i] and [I] F1 frequency (Hz)") hist(x$F1, breaks=brks, freq=FALSE, density=10, add=T) legend("topright",c("heed","hid"),fill=c("gray40","black"),density=c(0,10)) # Use t-test to ask whether that mid vowel has significantly different # mean F1 from high vowel. t.test(x$F1,y$F1) # Yes, it is significantly different, but how informative is that, given # the overlap? # Take advantage of fact that the F4 should not vary systematically with # front vowel height, to look at relationship. t.test(x$F4,y$F4) brks4=seq(2500,6000,500) hist(y$F4, breaks=brks4, freq=FALSE, ylim=c(0,0.0012), main="", col="gray40", xlab="Hillenbrand et al. (1995) [E] and [I] F4 frequency (Hz)") hist(x$F4, breaks=brks4, freq=FALSE, density=10, add=T) # Let's make a scatterplot. plot(y$F4, y$F1, xlim=c(brks4[1], brks4[8]), ylim=c(brks[1],brks[7]), pch=19, xlab="F4 (Hz)",ylab="F1 (Hz)") points(x$F4, x$F1, col="gray40") legend("topright",c("i","I"),pch=c(19,1),col=c("black","gray40")) brks2=seq(1800,3550,250) hist(y$F2, breaks=brks2, freq=FALSE, ylim=c(0,0.0012), main="", col="gray40", xlab="Hillenbrand et al. (1995) [i] and [I] F2 frequency (Hz)") hist(x$F2, breaks=brks2, freq=FALSE, density=10, add=T) # Let's make a scatterplot. plot(y$F2, y$F1, xlim=c(brks2[1], brks2[8]), ylim=c(brks[1],brks[7]), pch=19, xlab="F2 (Hz)",ylab="F1 (Hz)") points(x$F2, x$F1, col="gray40") legend("topright",c("i","I"),pch=c(19,1),col=c("black","gray40")) # Calculate covariance. cov(x$F1,x$F2) cov(y$F1,y$F2) cov(y$F1,y$F4,use="pairwise.complete.obs") # Calculate correlations. cor(x$F1,x$F2) cor(y$F1,y$F2) cor(x$F1,x$F4,use="pairwise.complete.obs") cor(y$F1,y$F4,use="pairwise.complete.obs") # Test hypothesis that the correlation is significantly different from 0. cor.test(x$F1,x$F2) cor.test(y$F1,y$F2) cor.test(x$F1,x$F4) cor.test(y$F1,y$F4)