survey <- read.csv("Stat100_200_2017spring_survey01M.csv")
table(survey$schoolYear)
##
## Freshman Junior Senior Sophomore
## 906 159 87 351
library(lattice)
xyplot(survey$GPA~survey$studyHr | survey$schoolYear, ylab = "GPA", xlab = "Average Study Hours")
fit <- lm(survey$GPA~survey$studyHr)
plot(survey$GPA~survey$studyHr, ylab = "GPA", xlab = "Average Study Hours")
abline(fit, col="red")
summary(fit)
##
## Call:
## lm(formula = survey$GPA ~ survey$studyHr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3360 -0.2892 0.1109 0.4108 0.8748
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.101772 0.027608 112.352 < 2e-16 ***
## survey$studyHr 0.046845 0.007569 6.189 7.78e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5602 on 1501 degrees of freedom
## Multiple R-squared: 0.02489, Adjusted R-squared: 0.02424
## F-statistic: 38.31 on 1 and 1501 DF, p-value: 7.783e-10
plot(residuals(fit)~survey$studyHr, pch=19, xlab="Average Study Hours", ylab = "Residuals")
fresh <- survey$schoolYear=="Freshman"
soph <- survey$schoolYear=="Sophomore"
jun <- survey$schoolYear=="Junior"
sen <- survey$schoolYear=="Senior"
LMfresh <- lm(survey$GPA[fresh]~survey$studyHr[fresh])
LMsoph <- lm(survey$GPA[soph]~survey$studyHr[soph])
LMjun <- lm(survey$GPA[jun]~survey$studyHr[jun])
LMsen <- lm(survey$GPA[sen]~survey$studyHr[sen])
summary(LMfresh)
##
## Call:
## lm(formula = survey$GPA[fresh] ~ survey$studyHr[fresh])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2671 -0.2841 0.1436 0.4436 0.8159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.10100 0.03984 77.832 < 2e-16 ***
## survey$studyHr[fresh] 0.05537 0.01160 4.774 2.11e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.606 on 904 degrees of freedom
## Multiple R-squared: 0.02459, Adjusted R-squared: 0.02351
## F-statistic: 22.79 on 1 and 904 DF, p-value: 2.108e-06
summary(LMsoph)
##
## Call:
## lm(formula = survey$GPA[soph] ~ survey$studyHr[soph])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75128 -0.28474 0.06188 0.37861 0.92203
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.02458 0.04850 62.367 < 2e-16 ***
## survey$studyHr[soph] 0.05338 0.01251 4.268 2.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.492 on 349 degrees of freedom
## Multiple R-squared: 0.04959, Adjusted R-squared: 0.04687
## F-statistic: 18.21 on 1 and 349 DF, p-value: 2.55e-05
summary(LMjun)
##
## Call:
## lm(formula = survey$GPA[jun] ~ survey$studyHr[jun])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.23174 -0.29400 0.08602 0.38602 0.80822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.18734 0.07359 43.312 <2e-16 ***
## survey$studyHr[jun] 0.00888 0.01837 0.483 0.629
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4875 on 157 degrees of freedom
## Multiple R-squared: 0.001486, Adjusted R-squared: -0.004874
## F-statistic: 0.2337 on 1 and 157 DF, p-value: 0.6295
summary(LMsen)
##
## Call:
## lm(formula = survey$GPA[sen] ~ survey$studyHr[sen])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29268 -0.24557 0.02508 0.31311 0.80153
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.15137 0.07858 40.102 <2e-16 ***
## survey$studyHr[sen] 0.04710 0.01906 2.472 0.0154 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.402 on 85 degrees of freedom
## Multiple R-squared: 0.06706, Adjusted R-squared: 0.05609
## F-statistic: 6.11 on 1 and 85 DF, p-value: 0.01544
summary(LMsoph)
##
## Call:
## lm(formula = survey$GPA[soph] ~ survey$studyHr[soph])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75128 -0.28474 0.06188 0.37861 0.92203
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.02458 0.04850 62.367 < 2e-16 ***
## survey$studyHr[soph] 0.05338 0.01251 4.268 2.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.492 on 349 degrees of freedom
## Multiple R-squared: 0.04959, Adjusted R-squared: 0.04687
## F-statistic: 18.21 on 1 and 349 DF, p-value: 2.55e-05
summary(LMjun)
##
## Call:
## lm(formula = survey$GPA[jun] ~ survey$studyHr[jun])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.23174 -0.29400 0.08602 0.38602 0.80822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.18734 0.07359 43.312 <2e-16 ***
## survey$studyHr[jun] 0.00888 0.01837 0.483 0.629
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4875 on 157 degrees of freedom
## Multiple R-squared: 0.001486, Adjusted R-squared: -0.004874
## F-statistic: 0.2337 on 1 and 157 DF, p-value: 0.6295
summary(LMsen)
##
## Call:
## lm(formula = survey$GPA[sen] ~ survey$studyHr[sen])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29268 -0.24557 0.02508 0.31311 0.80153
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.15137 0.07858 40.102 <2e-16 ***
## survey$studyHr[sen] 0.04710 0.01906 2.472 0.0154 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.402 on 85 degrees of freedom
## Multiple R-squared: 0.06706, Adjusted R-squared: 0.05609
## F-statistic: 6.11 on 1 and 85 DF, p-value: 0.01544
newHr <- data.frame(studyHr=5)
GPA_g <- coef(LMsen)[1] + 5*coef(LMsen)[2]
GPA_g
## (Intercept)
## 3.386885
For a senior student spending 5 hours/day studying, GPA is predicted to be 3.386885
StudySen <- lm(survey$studyHr[survey$schoolYear=="Senior"]~survey$GPA[survey$schoolYear=="Senior"])
coef(StudySen)[1] + GPA_g*coef(StudySen)[2]
## (Intercept)
## 3.552339
Predicted value of Study Hr is less than 5.