survey <- read.csv("Stat100_200_2017spring_survey01M.csv")

(a) Number of Students in Each Class

table(survey$schoolYear)
## 
##  Freshman    Junior    Senior Sophomore 
##       906       159        87       351

(b) GPA vs Average Study Hours in each Class

library(lattice)
xyplot(survey$GPA~survey$studyHr | survey$schoolYear, ylab = "GPA", xlab = "Average Study Hours")

(c) Linear Model

fit <- lm(survey$GPA~survey$studyHr)
plot(survey$GPA~survey$studyHr, ylab = "GPA", xlab = "Average Study Hours")
abline(fit, col="red")

summary(fit)
## 
## Call:
## lm(formula = survey$GPA ~ survey$studyHr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3360 -0.2892  0.1109  0.4108  0.8748 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.101772   0.027608 112.352  < 2e-16 ***
## survey$studyHr 0.046845   0.007569   6.189 7.78e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5602 on 1501 degrees of freedom
## Multiple R-squared:  0.02489,    Adjusted R-squared:  0.02424 
## F-statistic: 38.31 on 1 and 1501 DF,  p-value: 7.783e-10

(d) Relationship between GPA and the Average Study Hours

(e) Scatter Plot of Residuals

plot(residuals(fit)~survey$studyHr, pch=19, xlab="Average Study Hours", ylab = "Residuals")

(f) GPA vs Average Study Hours for each Class

fresh <- survey$schoolYear=="Freshman"
soph <- survey$schoolYear=="Sophomore"
jun <- survey$schoolYear=="Junior"
sen <- survey$schoolYear=="Senior"
LMfresh <- lm(survey$GPA[fresh]~survey$studyHr[fresh])
LMsoph <- lm(survey$GPA[soph]~survey$studyHr[soph])
LMjun <- lm(survey$GPA[jun]~survey$studyHr[jun])
LMsen <- lm(survey$GPA[sen]~survey$studyHr[sen])
summary(LMfresh)
## 
## Call:
## lm(formula = survey$GPA[fresh] ~ survey$studyHr[fresh])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2671 -0.2841  0.1436  0.4436  0.8159 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3.10100    0.03984  77.832  < 2e-16 ***
## survey$studyHr[fresh]  0.05537    0.01160   4.774 2.11e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.606 on 904 degrees of freedom
## Multiple R-squared:  0.02459,    Adjusted R-squared:  0.02351 
## F-statistic: 22.79 on 1 and 904 DF,  p-value: 2.108e-06
summary(LMsoph)
## 
## Call:
## lm(formula = survey$GPA[soph] ~ survey$studyHr[soph])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75128 -0.28474  0.06188  0.37861  0.92203 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.02458    0.04850  62.367  < 2e-16 ***
## survey$studyHr[soph]  0.05338    0.01251   4.268 2.55e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.492 on 349 degrees of freedom
## Multiple R-squared:  0.04959,    Adjusted R-squared:  0.04687 
## F-statistic: 18.21 on 1 and 349 DF,  p-value: 2.55e-05
summary(LMjun)
## 
## Call:
## lm(formula = survey$GPA[jun] ~ survey$studyHr[jun])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23174 -0.29400  0.08602  0.38602  0.80822 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.18734    0.07359  43.312   <2e-16 ***
## survey$studyHr[jun]  0.00888    0.01837   0.483    0.629    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4875 on 157 degrees of freedom
## Multiple R-squared:  0.001486,   Adjusted R-squared:  -0.004874 
## F-statistic: 0.2337 on 1 and 157 DF,  p-value: 0.6295
summary(LMsen)
## 
## Call:
## lm(formula = survey$GPA[sen] ~ survey$studyHr[sen])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.29268 -0.24557  0.02508  0.31311  0.80153 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.15137    0.07858  40.102   <2e-16 ***
## survey$studyHr[sen]  0.04710    0.01906   2.472   0.0154 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.402 on 85 degrees of freedom
## Multiple R-squared:  0.06706,    Adjusted R-squared:  0.05609 
## F-statistic:  6.11 on 1 and 85 DF,  p-value: 0.01544
summary(LMsoph)
## 
## Call:
## lm(formula = survey$GPA[soph] ~ survey$studyHr[soph])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75128 -0.28474  0.06188  0.37861  0.92203 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.02458    0.04850  62.367  < 2e-16 ***
## survey$studyHr[soph]  0.05338    0.01251   4.268 2.55e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.492 on 349 degrees of freedom
## Multiple R-squared:  0.04959,    Adjusted R-squared:  0.04687 
## F-statistic: 18.21 on 1 and 349 DF,  p-value: 2.55e-05
summary(LMjun)
## 
## Call:
## lm(formula = survey$GPA[jun] ~ survey$studyHr[jun])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23174 -0.29400  0.08602  0.38602  0.80822 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.18734    0.07359  43.312   <2e-16 ***
## survey$studyHr[jun]  0.00888    0.01837   0.483    0.629    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4875 on 157 degrees of freedom
## Multiple R-squared:  0.001486,   Adjusted R-squared:  -0.004874 
## F-statistic: 0.2337 on 1 and 157 DF,  p-value: 0.6295
summary(LMsen)
## 
## Call:
## lm(formula = survey$GPA[sen] ~ survey$studyHr[sen])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.29268 -0.24557  0.02508  0.31311  0.80153 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.15137    0.07858  40.102   <2e-16 ***
## survey$studyHr[sen]  0.04710    0.01906   2.472   0.0154 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.402 on 85 degrees of freedom
## Multiple R-squared:  0.06706,    Adjusted R-squared:  0.05609 
## F-statistic:  6.11 on 1 and 85 DF,  p-value: 0.01544

(g) GPA Prediction

newHr <- data.frame(studyHr=5)
GPA_g <- coef(LMsen)[1] + 5*coef(LMsen)[2]
GPA_g
## (Intercept) 
##    3.386885

For a senior student spending 5 hours/day studying, GPA is predicted to be 3.386885

(h) StudyHr from GPA Prediction

StudySen <- lm(survey$studyHr[survey$schoolYear=="Senior"]~survey$GPA[survey$schoolYear=="Senior"])
coef(StudySen)[1] + GPA_g*coef(StudySen)[2]
## (Intercept) 
##    3.552339

Predicted value of Study Hr is less than 5.