install.packages(“rmarkdown”)
# sets wd to the path on my computer;
setwd("C:\\Users\\hmon1\\Desktop\\10C Homework\\") #this is where you downloaded the HW1.csv file
# loads in data for the full population
pop<-read.csv("HW21.csv")
names(pop) <- c("G", "X", "Y")
# sets the seed for the random number generator
set.seed(48183130) #use your student ID instead of 12345678
# assigns a "random" sample of 10 from group A and 5 from group B from the population
G_A<-pop[sample(nrow(pop), 10, pop$G == "A", replace=FALSE),]
G_B<-pop[sample(nrow(pop), 5, pop$G == "B", replace=FALSE),]
# use this data
data <- rbind(G_A, G_B)
data
## G X Y
## 421 A 10 10
## 659 A 65 65
## 219 A 70 80
## 800 A 40 30
## 187 A 35 35
## 542 A 65 65
## 274 A 60 70
## 773 A 20 30
## 94 A 70 60
## 841 A 20 30
## 544 B 25 40
## 576 B 25 60
## 178 B 25 60
## 472 B 35 45
## 312 B 25 40
plot(data$X, data$Y, main=c(paste("Scatterplot")), xlab="X", ylab="Y", pch = c(1,3)[as.factor(data$G)])
legend("bottomright", pch=c(1,3), c("A","B"), bty="o", cex=.8)
# regression for A and B combined
model_full <- lm(Y ~ X, data=data)
summary(model_full)
##
## Call:
## lm(formula = Y ~ X, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.485 -6.897 -1.660 4.698 22.421
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.4037 7.0565 2.750 0.016545 *
## X 0.7270 0.1597 4.554 0.000542 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.46 on 13 degrees of freedom
## Multiple R-squared: 0.6146, Adjusted R-squared: 0.585
## F-statistic: 20.73 on 1 and 13 DF, p-value: 0.0005416
# Pearson's r for A and B combined
r_full <- cor(data$X, data$Y)
r_full
## [1] 0.7839902
# regression for A
model_A <- lm(Y ~ X, data=G_A)
summary(model_A)
##
## Call:
## lm(formula = Y ~ X, data = G_A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.3499 -3.8610 -0.7594 6.3776 9.5587
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8948 5.9538 0.822 0.435
## X 0.9364 0.1177 7.954 4.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.218 on 8 degrees of freedom
## Multiple R-squared: 0.8877, Adjusted R-squared: 0.8737
## F-statistic: 63.26 on 1 and 8 DF, p-value: 4.553e-05
# Pearson's r for A
r_A <- cor(G_A$X, G_A$Y)
r_A
## [1] 0.9421967
# regression for B
model_B <- lm(Y ~ X, data=G_B)
summary(model_B)
##
## Call:
## lm(formula = Y ~ X, data = G_B)
##
## Residuals:
## 544 576 178 472 312
## -1.000e+01 1.000e+01 1.000e+01 -4.441e-15 -1.000e+01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.500 35.237 1.774 0.174
## X -0.500 1.291 -0.387 0.724
##
## Residual standard error: 11.55 on 3 degrees of freedom
## Multiple R-squared: 0.04762, Adjusted R-squared: -0.2698
## F-statistic: 0.15 on 1 and 3 DF, p-value: 0.7244
# Person's r for B
r_B <- cor(G_B$X, G_B$Y)
r_B
## [1] -0.2182179
# creates plot
plot(data$X, data$Y, main=c(paste("Scatterplot by Subgroups")), xlab="X", ylab="Y", pch = c(1,3)[as.factor(data$G)])
abline(model_full)
abline(model_A, lty = "dashed")
abline(model_B, lty = "dotted")
legend("bottomright", pch=c(1,3), c("A","B"), bty="o", cex=.8)