install.packages(“rmarkdown”)
# sets wd to the path on my computer;
setwd("C:\\Users\\hmon1\\Desktop\\10C Homework\\") #this is where you downloaded the HW1.csv file
# loads in data for the full population
pop<-read.csv("HW20.csv")
names(pop) <- c("X", "Y")
# sets the seed for the random number generator
set.seed(48183130) #use your student ID instead of 12345678
# assigns a "random" sample of 12 from the population to 'data'
data<-pop[sample(nrow(pop), 12, replace=FALSE),]
# use this data
data
## X Y
## 640 9 6
## 413 9 6
## 871 8 6
## 336 5 5
## 357 8 6
## 238 7 6
## 56 10 7
## 83 9 6
## 351 9 6
## 632 13 8
## 477 8 6
## 458 11 7
# sum of squares for Y
SSY <- sum((data$Y-mean(data$Y))^2)
SSY
## [1] 6.25
# regression
model <- lm(Y ~ X, data=data)
summary(model)
##
## Call:
## lm(formula = Y ~ X, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3092 -0.3092 0.0458 0.1508 0.4008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.11450 0.37405 8.327 8.28e-06 ***
## X 0.35496 0.04139 8.576 6.37e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2735 on 10 degrees of freedom
## Multiple R-squared: 0.8803, Adjusted R-squared: 0.8683
## F-statistic: 73.55 on 1 and 10 DF, p-value: 6.373e-06
# ANOVA
anova<-anova(model)
anova
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X 1 5.5019 5.5019 73.546 6.373e-06 ***
## Residuals 10 0.7481 0.0748
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# calculates Pearson's r, r2, and the standard error of the estimate
r <- round(cor(data$X, data$Y),3)
r2 <- round(r^2,3)
n <- length(data$X)
SEE <-sqrt((anova$'Sum Sq'[2])/(n-2))
r
## [1] 0.938
r2
## [1] 0.88
SEE
## [1] 0.2735126