install.packages(“rmarkdown”)
# sets wd to the path on my computer;
setwd("C:\\Users\\hmon1\\Desktop\\10C Homework\\") #this is where you downloaded the HW1.csv file
# loads in data for the full population
pop<-read.csv("HW22.csv")
names(pop) <- c("X", "Y")
# sets the seed for the random number generator
set.seed(48183130) #use your student ID instead of 12345678
# assigns a "random" sample of 10 from the population to 'data'
data<-pop[sample(nrow(pop), 10, replace=FALSE),]
# use this data
data
## X Y
## 640 6 5
## 413 8 6
## 871 8 6
## 336 7 6
## 357 7 6
## 238 10 7
## 56 9 6
## 83 8 6
## 351 8 6
## 632 8 6
# regression
model <- lm(Y ~ X, data=data)
summary(model)
##
## Call:
## lm(formula = Y ~ X, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4037 -0.0367 -0.0367 0.1628 0.3303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.10092 0.62249 4.982 0.00108 **
## X 0.36697 0.07812 4.698 0.00155 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2579 on 8 degrees of freedom
## Multiple R-squared: 0.7339, Adjusted R-squared: 0.7007
## F-statistic: 22.07 on 1 and 8 DF, p-value: 0.001546
# calculates Pearson's r and r2
r <- round(cor(data$X, data$Y),3)
r2 <- round(r^2,3)
r
## [1] 0.857
r2
## [1] 0.734
# creates plot
plot(data$X, data$Y, main=c(paste("Scatterplot")), xlim=c(0,10), ylim=c(0,10), xlab="X", ylab="Y")
abline(lm(Y ~ X, data=data))
# calculates predicted values and residuals
data$Y_pred <-round(predict(model),2)
data$resid <-round(residuals(model),2)
data
## X Y Y_pred resid
## 640 6 5 5.30 -0.30
## 413 8 6 6.04 -0.04
## 871 8 6 6.04 -0.04
## 336 7 6 5.67 0.33
## 357 7 6 5.67 0.33
## 238 10 7 6.77 0.23
## 56 9 6 6.40 -0.40
## 83 8 6 6.04 -0.04
## 351 8 6 6.04 -0.04
## 632 8 6 6.04 -0.04
# creates residual plot
plot(data$X, data$resid, main=c(paste("Residual Plot")), xlab="X", ylab="Y-Y'")
abline(0,0)