Using the ACTIVE data, do the following homework. Use set.seed(0)
before your bootstrap.
- Read in data from the active.txt data file
#read data into R, sets the first row as a header, and converts missing data to NA
active = read.table('active.txt',
header = T, na.string = '99999')
- Select a subset of data with the two variables – reasoning ability
(reason) and memory ability (hvltt). Then, select participants older
than 86.
## take out a subset of data
#creates a subset with only participants older than 86
d86 = active[active$age > 86, ]
d86
- Suppose we are interested in the relationship between reasoning
ability (reason) and memory ability (hvltt). Remove missing data in the
two variables to make sure all participants have data on both variables
How many participants are left after selection? n = 27
## remove missing data for reason and hvltt
d86 = d86[!is.na(d86$reason) & !is.na(d86$hvltt), ]
d86
nrow(d86) #the number of participants left after selection
[1] 27
- Investigate the distribution of each variable.
attach(d86)
summary(reason)
Min. 1st Qu. Median Mean 3rd Qu. Max.
8.00 12.00 16.00 18.56 23.00 42.00
mean(reason)
[1] 18.55556
sd(reason)
[1] 8.450459
#five number summaries for hvltt and reason
summary(hvltt)
Min. 1st Qu. Median Mean 3rd Qu. Max.
9.0 18.5 20.0 20.7 24.5 30.0
mean(hvltt)
[1] 20.7037
sd(hvltt)
[1] 5.224313
par(mfrow = c(1, 2)) #sets the plots side by side
#creates 2 density plots for reasoning and memory ability
plot(density(reason), main = "Density Plot", xlab = "Reason")
plot(density(hvltt), main = "Density Plot", xlab = "Memory Ability")

both density curves have a bell shaped distribution. Reasoning
ability shows that most adults over 86 score just slightly below average
and memory abbility is still average.
- Display the relationship between reason and hvltt
#scatterplot of the relationship between reason and hvltt
plot(reason, hvltt, main = 'Relationship between Reasoning Ability and
Memory Ability', xlab = "Reasoning Ability", ylab = "Memory Ability (HVLTT)",
xlim = c(0,50), ylim = c(0,35))
#adds the regression line to the plot
abline(lm(hvltt~reason), col='blue', lwd=3)
- Fit a simple linear regression model to the data Is the regression
slope coefficient significant?
#simple regression model where memory ability is the outcome and reasoning is the predictor
regmodel = lm(hvltt~reason)
#prints a summary of the regresson model
summary(regmodel)
The p-value for the slope = 0.024, which is less than 0.05 therefore,
we reject the null hypothesis, the slope is significant.
How does the distribution of the residuals from the regression
analysis look like?
##distribution of the residuals
#histogram of the residuals, prob=T scales the y-axis to show density instead of frequency
hist(residuals(regmodel),prob = T, xlab='errors', main='Disribution of Residuals', ylim=c(0, .15 ))
#places a smooth density curve on the histogram
lines(density(residuals(regmodel)))
Find the bootstrap distribution of the correlation (in R, the
function cor(x,y) gives the correlation between x and y) between reason
and hvltt How does the bootstrap distribution look like? Calculate
bootstrap standard error Get the bootstrap percentile CI Is the
correlation significant at alpha level 0.05?
# Bootstrap correlation
B = 1000 #number of bootstrap resamples
boot.cor = NULL #create an empty variable
n = length(hvltt) #sample size
set.seed(0) #set random seed for reproducibility
for (i in 1:B){ #creates a loop that repeats B times
index = sample(1:n, replace = TRUE) #draws a bootstrap sample from the data with replacement
boot.reason = d86$reason[index]
boot.hvltt = d86$hvltt[index]
boot.cor[i] = cor(boot.reason, boot.hvltt) # calculates the correlation and stores it in i
}
#plot the bootstrap sampling distribution, breaks set the number of bins, prob=T shows density
hist(boot.cor, breaks = 27,prob = T,
main = "Bootstrap Distribution of the Correlation Between Reason and Hvltt)",
xlab = "Bootstrap correlations", ylab = 'Density')
lines(density(boot.cor)) #creates a density curve

## bootstrap standard error
sd(boot.cor)
[1] 0.1576128
## percentile CI shows the 95% confidence interval for the correlation
quantile(boot.cor, prob = c(0.025, 0.975))
2.5% 97.5%
-0.001577498 0.603452748
The correlation is not statistically significant.
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVXNpbmcgdGhlIEFDVElWRSBkYXRhLCBkbyB0aGUgZm9sbG93aW5nIGhvbWV3b3JrLiBVc2Ugc2V0LnNlZWQoMCkgYmVmb3JlIHlvdXIgYm9vdHN0cmFwLgoKMS4gUmVhZCBpbiBkYXRhIGZyb20gdGhlIGFjdGl2ZS50eHQgZGF0YSBmaWxlCmBgYHtyfQojcmVhZCBkYXRhIGludG8gUiwgc2V0cyB0aGUgZmlyc3Qgcm93IGFzIGEgaGVhZGVyLCBhbmQgY29udmVydHMgbWlzc2luZyBkYXRhIHRvIE5BCmFjdGl2ZSA9IHJlYWQudGFibGUoJ2FjdGl2ZS50eHQnLAogICAgICAgICAgICAgICAgICAgIGhlYWRlciA9IFQsIG5hLnN0cmluZyA9ICc5OTk5OScpCmBgYAoyLiBTZWxlY3QgYSBzdWJzZXQgb2YgZGF0YSB3aXRoIHRoZSB0d28gdmFyaWFibGVzIOKAkyByZWFzb25pbmcgYWJpbGl0eSAocmVhc29uKSBhbmQgbWVtb3J5IGFiaWxpdHkgKGh2bHR0KS4gVGhlbiwgc2VsZWN0IHBhcnRpY2lwYW50cyBvbGRlciB0aGFuIDg2LgpgYGB7cn0KIyMgdGFrZSBvdXQgYSBzdWJzZXQgb2YgZGF0YQojY3JlYXRlcyBhIHN1YnNldCB3aXRoIG9ubHkgcGFydGljaXBhbnRzIG9sZGVyIHRoYW4gODYKZDg2ID0gYWN0aXZlW2FjdGl2ZSRhZ2UgPiA4NiwgXQpkODYKYGBgCgozLiBTdXBwb3NlIHdlIGFyZSBpbnRlcmVzdGVkIGluIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiByZWFzb25pbmcgYWJpbGl0eSAocmVhc29uKSBhbmQgbWVtb3J5IGFiaWxpdHkgKGh2bHR0KS4KUmVtb3ZlIG1pc3NpbmcgZGF0YSBpbiB0aGUgdHdvIHZhcmlhYmxlcyB0byBtYWtlIHN1cmUgYWxsIHBhcnRpY2lwYW50cyBoYXZlIGRhdGEgb24gYm90aCB2YXJpYWJsZXMKSG93IG1hbnkgcGFydGljaXBhbnRzIGFyZSBsZWZ0IGFmdGVyIHNlbGVjdGlvbj8gbiA9IDI3CmBgYHtyfQojIyByZW1vdmUgbWlzc2luZyBkYXRhIGZvciByZWFzb24gYW5kIGh2bHR0CgpkODYgPSBkODZbIWlzLm5hKGQ4NiRyZWFzb24pICYgIWlzLm5hKGQ4NiRodmx0dCksIF0KZDg2Cm5yb3coZDg2KSAjdGhlIG51bWJlciBvZiBwYXJ0aWNpcGFudHMgbGVmdCBhZnRlciBzZWxlY3Rpb24KYGBgCgo0LiBJbnZlc3RpZ2F0ZSB0aGUgZGlzdHJpYnV0aW9uIG9mIGVhY2ggdmFyaWFibGUuCmBgYHtyfQphdHRhY2goZDg2KQpzdW1tYXJ5KHJlYXNvbikKbWVhbihyZWFzb24pCnNkKHJlYXNvbikKI2ZpdmUgbnVtYmVyIHN1bW1hcmllcyBmb3IgaHZsdHQgYW5kIHJlYXNvbgpzdW1tYXJ5KGh2bHR0KQptZWFuKGh2bHR0KQpzZChodmx0dCkKCnBhcihtZnJvdyA9IGMoMSwgMikpICNzZXRzIHRoZSBwbG90cyBzaWRlIGJ5IHNpZGUKI2NyZWF0ZXMgMiBkZW5zaXR5IHBsb3RzIGZvciByZWFzb25pbmcgYW5kIG1lbW9yeSBhYmlsaXR5CnBsb3QoZGVuc2l0eShyZWFzb24pLCBtYWluID0gIkRlbnNpdHkgUGxvdCIsIHhsYWIgPSAiUmVhc29uIikKcGxvdChkZW5zaXR5KGh2bHR0KSwgbWFpbiA9ICJEZW5zaXR5IFBsb3QiLCB4bGFiID0gIk1lbW9yeSBBYmlsaXR5IikKYGBgCmJvdGggZGVuc2l0eSBjdXJ2ZXMgaGF2ZSBhIGJlbGwgc2hhcGVkIGRpc3RyaWJ1dGlvbi4gUmVhc29uaW5nIGFiaWxpdHkgc2hvd3MgdGhhdCBtb3N0IGFkdWx0cyBvdmVyIDg2IHNjb3JlIGp1c3Qgc2xpZ2h0bHkgYmVsb3cgYXZlcmFnZSBhbmQgbWVtb3J5IGFiYmlsaXR5IGlzIHN0aWxsIGF2ZXJhZ2UuIAoKNS4gRGlzcGxheSB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gcmVhc29uIGFuZCBodmx0dApgYGB7cn0KI3NjYXR0ZXJwbG90IG9mIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiByZWFzb24gYW5kIGh2bHR0IApwbG90KHJlYXNvbiwgaHZsdHQsIG1haW4gPSAnUmVsYXRpb25zaGlwIGJldHdlZW4gUmVhc29uaW5nIEFiaWxpdHkgYW5kIAogICAgIE1lbW9yeSBBYmlsaXR5JywgeGxhYiA9ICJSZWFzb25pbmcgQWJpbGl0eSIsIHlsYWIgPSAiTWVtb3J5IEFiaWxpdHkgKEhWTFRUKSIsCiAgICAgeGxpbSA9IGMoMCw1MCksIHlsaW0gPSBjKDAsMzUpKQojYWRkcyB0aGUgcmVncmVzc2lvbiBsaW5lIHRvIHRoZSBwbG90CmFibGluZShsbShodmx0dH5yZWFzb24pLCBjb2w9J2JsdWUnLCBsd2Q9MykKCmBgYAo2LiBGaXQgYSBzaW1wbGUgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwgdG8gdGhlIGRhdGEKSXMgdGhlIHJlZ3Jlc3Npb24gc2xvcGUgY29lZmZpY2llbnQgc2lnbmlmaWNhbnQ/CmBgYHtyfQojc2ltcGxlIHJlZ3Jlc3Npb24gbW9kZWwgd2hlcmUgbWVtb3J5IGFiaWxpdHkgaXMgdGhlIG91dGNvbWUgYW5kIHJlYXNvbmluZyBpcyB0aGUgcHJlZGljdG9yCnJlZ21vZGVsID0gbG0oaHZsdHR+cmVhc29uKQojcHJpbnRzIGEgc3VtbWFyeSBvZiB0aGUgcmVncmVzc29uIG1vZGVsCnN1bW1hcnkocmVnbW9kZWwpCmBgYApUaGUgcC12YWx1ZSBmb3IgdGhlIHNsb3BlID0gMC4wMjQsIHdoaWNoIGlzIGxlc3MgdGhhbiAwLjA1CnRoZXJlZm9yZSwgd2UgcmVqZWN0IHRoZSBudWxsIGh5cG90aGVzaXMsIHRoZSBzbG9wZSBpcyBzaWduaWZpY2FudC4gCgpIb3cgZG9lcyB0aGUgZGlzdHJpYnV0aW9uIG9mIHRoZSByZXNpZHVhbHMgZnJvbSB0aGUgcmVncmVzc2lvbiBhbmFseXNpcyBsb29rIGxpa2U/CmBgYHtyfQojI2Rpc3RyaWJ1dGlvbiBvZiB0aGUgcmVzaWR1YWxzCiNoaXN0b2dyYW0gb2YgdGhlIHJlc2lkdWFscywgcHJvYj1UIHNjYWxlcyB0aGUgeS1heGlzIHRvIHNob3cgZGVuc2l0eSBpbnN0ZWFkIG9mIGZyZXF1ZW5jeSAKaGlzdChyZXNpZHVhbHMocmVnbW9kZWwpLHByb2IgPSBULCB4bGFiPSdlcnJvcnMnLCBtYWluPSdEaXNyaWJ1dGlvbiBvZiBSZXNpZHVhbHMnLCB5bGltPWMoMCwgLjE1ICkpCgojcGxhY2VzIGEgc21vb3RoIGRlbnNpdHkgY3VydmUgb24gdGhlIGhpc3RvZ3JhbQpsaW5lcyhkZW5zaXR5KHJlc2lkdWFscyhyZWdtb2RlbCkpKQpgYGAKCkZpbmQgdGhlIGJvb3RzdHJhcCBkaXN0cmlidXRpb24gb2YgdGhlIGNvcnJlbGF0aW9uIChpbiBSLCB0aGUgZnVuY3Rpb24gY29yKHgseSkgZ2l2ZXMgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4geCBhbmQgeSkgYmV0d2VlbiByZWFzb24gYW5kIGh2bHR0CkhvdyBkb2VzIHRoZSBib290c3RyYXAgZGlzdHJpYnV0aW9uIGxvb2sgbGlrZT8KQ2FsY3VsYXRlIGJvb3RzdHJhcCBzdGFuZGFyZCBlcnJvcgpHZXQgdGhlIGJvb3RzdHJhcCBwZXJjZW50aWxlIENJCklzIHRoZSBjb3JyZWxhdGlvbiBzaWduaWZpY2FudCBhdCBhbHBoYSBsZXZlbCAwLjA1PwoKYGBge3J9CiMgQm9vdHN0cmFwIGNvcnJlbGF0aW9uCkIgPSAxMDAwICNudW1iZXIgb2YgYm9vdHN0cmFwIHJlc2FtcGxlcyAKYm9vdC5jb3IgPSBOVUxMICNjcmVhdGUgYW4gZW1wdHkgdmFyaWFibGUKbiA9IGxlbmd0aChodmx0dCkgI3NhbXBsZSBzaXplCgpzZXQuc2VlZCgwKSAjc2V0IHJhbmRvbSBzZWVkIGZvciByZXByb2R1Y2liaWxpdHkKCmZvciAoaSBpbiAxOkIpeyAjY3JlYXRlcyBhIGxvb3AgdGhhdCByZXBlYXRzIEIgdGltZXMKCWluZGV4ID0gc2FtcGxlKDE6biwgcmVwbGFjZSA9IFRSVUUpICNkcmF3cyBhIGJvb3RzdHJhcCBzYW1wbGUgZnJvbSB0aGUgZGF0YSB3aXRoIHJlcGxhY2VtZW50Cglib290LnJlYXNvbiA9IGQ4NiRyZWFzb25baW5kZXhdCglib290Lmh2bHR0ID0gZDg2JGh2bHR0W2luZGV4XQoJYm9vdC5jb3JbaV0gPSBjb3IoYm9vdC5yZWFzb24sIGJvb3QuaHZsdHQpICMgY2FsY3VsYXRlcyB0aGUgY29ycmVsYXRpb24gYW5kIHN0b3JlcyBpdCBpbiBpCgl9CiNwbG90IHRoZSBib290c3RyYXAgc2FtcGxpbmcgZGlzdHJpYnV0aW9uLCBicmVha3Mgc2V0IHRoZSBudW1iZXIgb2YgYmlucywgcHJvYj1UIHNob3dzIGRlbnNpdHkgCmhpc3QoYm9vdC5jb3IsIGJyZWFrcyA9IDI3LHByb2IgPSBULAogICAgIG1haW4gPSAiQm9vdHN0cmFwIERpc3RyaWJ1dGlvbiBvZiB0aGUgQ29ycmVsYXRpb24gQmV0d2VlbiBSZWFzb24gYW5kIEh2bHR0KSIsCiAgICAgeGxhYiA9ICJCb290c3RyYXAgY29ycmVsYXRpb25zIiwgeWxhYiA9ICdEZW5zaXR5JykKbGluZXMoZGVuc2l0eShib290LmNvcikpICNjcmVhdGVzIGEgZGVuc2l0eSBjdXJ2ZSAKCgojIyBib290c3RyYXAgc3RhbmRhcmQgZXJyb3IKc2QoYm9vdC5jb3IpCgojIyBwZXJjZW50aWxlIENJIHNob3dzIHRoZSA5NSUgY29uZmlkZW5jZSBpbnRlcnZhbCBmb3IgdGhlIGNvcnJlbGF0aW9uIApxdWFudGlsZShib290LmNvciwgcHJvYiA9IGMoMC4wMjUsIDAuOTc1KSkKYGBgClRoZSBjb3JyZWxhdGlvbiBpcyBub3Qgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudC4K