library(languageR)
Question 1) What is the correlation between log lexical decision time and log frequency-per-million-in-CELEX?
#Log decision time and frequency correlation
with(english, cor(RTlexdec, WrittenFrequency))
## [1] -0.434815
plot(english$WrittenFrequency, english$RTlexdec, xlab="log WrittenFrequency", ylab="log RT", main="Correlation between Written Frequency and RT", pch=8)
abline(lm(english$RTlexdec ~ english$WrittenFrequency), col="red")
lm(english$RTlexdec ~ english$WrittenFrequency)
##
## Call:
## lm(formula = english$RTlexdec ~ english$WrittenFrequency)
##
## Coefficients:
## (Intercept) english$WrittenFrequency
## 6.73593 -0.03701
A negative correlation is intuitive here - the more frequent a word the faster we’d expect RT’s to be. An intercept of ~6.7 means that the linear model predicts that a word that appears once would have a log RT of 6.7.
Question 2) Is log lexical decision time approximately normally distributed?
log RT values do appear to approximate a normal distrubtion when we overlay a normal distribution with mean and standard deviation computed from the RTlexdec data:
RT.mean = mean(english$RTlexdec)
RT.mean
## [1] 6.550097
RT.sd = sd(english$RTlexdec)
RT.sd
## [1] 0.1569188
#Plot log RT distribution
plot(density(english$RTlexdec), main="All Data log RTlexdec", col="black")
#Overlay normal curve
curve(dnorm(x, mean=RT.mean, sd=RT.sd), add=T, col="black", lty=2)
legend("topleft", c("logRT", "dnorm"), col="black", lty=1:2,)
Question 3) Are log lexical decision times approximately normally distributed for each subset? Impressionistically, how does the approximation compare for each subset to that of the combined data set?
Log lexical decision times appear appr normally distributed for each subset. The subsets also allow us to see what appears to be a driving factor behind the slightly bimodal shape of the combined data set (due to the lower mean RT for Young compared to old).
old = subset(english, AgeSubject=="old")
young = subset(english, AgeSubject=="young")
#sample means and sd
old.RT.mean = mean(old$RTlexdec)
old.RT.sd = sd(old$RTlexdec)
young.RT.mean = mean(young$RTlexdec)
young.RT.sd = sd(young$RTlexdec)
par(mfrow=c(1,2))
#Old RT plots
plot(density(old$RTlexdec), main="Old, log RT", col="blue")
curve(dnorm(x, mean=old.RT.mean, sd=old.RT.sd), col="blue", lty=2, add=T)
legend("topleft", c("old.logRT", "old.dnorm"), col="blue", lty=1:2,)
#Young RT plots
plot(density(young$RTlexdec), main="Young, log RT", col="red")
curve(dnorm(x, mean=young.RT.mean, sd=old.RT.sd), col="red", lty=2, add=T)
legend("topleft", c("logRT", "dnorm"), col="red", lty=1:2)
Question 4) What are the sample mean and standard deviation of rt.diff? Plot its kernel density and overlap a normal approximation. How good is the normal approximation?
The normal approximation appears to be a fairly close fit to our kernel density plot of the reaction time differences.
rt.diff = old$RTlexdec - young$RTlexdec
rt.diff.mu = mean(rt.diff)
plot(density(rt.diff), main="RT.diff Kernel Density", col="purple")
curve(dnorm(x, mean(rt.diff), sd(rt.diff)), lty=2, add=T, col="purple")
abline(v = rt.diff.mu, col="red")
legend("topleft", c("logRT", "dnorm"), col="purple", lty=1:2)
Question 5) Compute a 95% frequentist confidence interval for the difference in mean RTs between the two groups using (a) bootstrapping, and (b) a t-test.
a) CI’s using boostrapping
#Our resample function (which we'll resample from our rt.diff data)
#This function draws a new sample with replacement, drawing as many
#as in the original data set (n=2284)
length(rt.diff)
## [1] 2284
resample = function(i) {
resampled.data = sample(rt.diff, replace=T, size=length(rt.diff))
#return the mean value from our resampled data
return(mean(resampled.data))
}
#Remember our RT.diff mean =
rt.diff.mu
## [1] 0.2217215
resample(1)
## [1] 0.2212836
resample(10)
## [1] 0.224911
bootstrap.resample.est = sapply(1:1000, FUN=resample)
plot(density(bootstrap.resample.est), col='blue',
main='Bootstrap Param Estimate')
abline(v=mean(bootstrap.resample.est), col='red')
abline(v=quantile(bootstrap.resample.est, prob=c(.025,.975)),
col='green')
#Average of our boostrapped data
mean(bootstrap.resample.est)
## [1] 0.2217521
#Values at the 2.5% and 97.5% quantiles
quantile(bootstrap.resample.est, prob=c(.025,.975))
## 2.5% 97.5%
## 0.2177391 0.2255700
#using resample() function from above
bootstrap.CI.sim = function(i) {
many.resampled.est = sapply(1:1000, FUN=resample)
est.CI = quantile(many.resampled.est, prob=c(.025,.975))
contains.T.param = rt.diff.mu > est.CI[1] && rt.diff.mu < est.CI[2]
return(contains.T.param)
}
bootstrap.CI.sim(1:10)
## [1] TRUE
many.bootstrap.CIs = sapply(1:1000, FUN=bootstrap.CI.sim)
many.bootstrap.CIs[1:100]
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [29] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [43] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [57] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [99] TRUE TRUE
mean(many.bootstrap.CIs)
## [1] 1
Given that all our tests are evaluating to True, this would indicate that our CI is a bit too wide. We’d want to have more rejections so the acceptance rate levels out to about 95%.
b) CI’s using t.test
#Pass our rt.diff data
#Null hyp: There is no sig difference between the young and old so
#rt.young.mu - rt.old.mu == 0 || rt.young.mu == rt.old.mu
#Alt hup: There is a difference in reaction time so
#rt.young.mu != rt.old.mu
#Since we don't know the direction of the difference in sample means
#we'll perform a two-tailed test
#mu=0 correspondents with our Null Hyp that there is no difference
#conf.level correspondes with the 95% CI we're interested in
t.test(rt.diff, alternative=c("two.sided"), mu=0, conf.level=.95)
##
## One Sample t-test
##
## data: rt.diff
## t = 115.8753, df = 2283, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2179692 0.2254737
## sample estimates:
## mean of x
## 0.2217215
The t.test suggests that there is likely a difference in the RT between the old and young subjects such that we can expect the average difference in RT to fall between .218 and .225 95% of the time.
Let’s look at the difference in CI’s for our t.test and boostrapping:
#t.test 95% CIs
t.test(rt.diff, alternative=c("two.sided"),
mu=0, conf.level=.95)["conf.int"]
## $conf.int
## [1] 0.2179692 0.2254737
## attr(,"conf.level")
## [1] 0.95
#boostrap 95% CIs
quantile(bootstrap.resample.est, prob=c(.025,.975))
## 2.5% 97.5%
## 0.2177391 0.2255700
Question 6) How do the two confidence intervals compare? Suppose that the distribution were not approximately normal - which should you prefer?
Our t.test and most frequentist methods assume a normal distribution for our data, relying heavily on the central limit theorem. This is one of the reasons why frequentist solutions perform ‘better’ with larger samples (because distributions are more likely to approach normal at larger sizes of n). However, we cannot always assume a normal distribution. Because our boostrapping method simply re-samples from the original distribution it is an accurate reflection of the underlying parameters (esp if they are not normal).
Question 7) What is the p-value of the difference? Is the difference “statistically significant”? In plain English, what does this mean, in terms of the confidence intervals and the meaning of frequentist CIs?
#t.test 95% CIs
t.test(rt.diff, alternative=c("two.sided"),
mu=0, conf.level=.95)
##
## One Sample t-test
##
## data: rt.diff
## t = 115.8753, df = 2283, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2179692 0.2254737
## sample estimates:
## mean of x
## 0.2217215
Our p-value of 2.2e^-16 indicates a very low probability that we will commit a type 1 error by rejecting the Null Hypothesis when it is in fact true.