Start by:
enroll = read.csv("enrollmentForecast.csv")
str(enroll)
## 'data.frame': 29 obs. of 5 variables:
## $ YEAR : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ROLL : int 5501 5945 6629 7556 8716 9369 9920 10167 11084 12504 ...
## $ UNEM : num 8.1 7 7.3 7.5 7 6.4 6.5 6.4 6.3 7.7 ...
## $ HGRAD: int 9552 9680 9731 11666 14675 15265 15484 15723 16501 16890 ...
## $ INC : int 1923 1961 1979 2030 2112 2192 2235 2351 2411 2475 ...
summary(enroll)
## YEAR ROLL UNEM HGRAD INC
## Min. : 1 Min. : 5501 Min. : 5.700 Min. : 9552 Min. :1923
## 1st Qu.: 8 1st Qu.:10167 1st Qu.: 7.000 1st Qu.:15723 1st Qu.:2351
## Median :15 Median :14395 Median : 7.500 Median :17203 Median :2863
## Mean :15 Mean :12707 Mean : 7.717 Mean :16528 Mean :2729
## 3rd Qu.:22 3rd Qu.:14969 3rd Qu.: 8.200 3rd Qu.:18266 3rd Qu.:3127
## Max. :29 Max. :16081 Max. :10.100 Max. :19800 Max. :3345
library(ggplot2)
library(ggthemes)
myplot = ggplot(enroll, aes(x=UNEM, y=ROLL)) + ggtitle("Unemployment Rate vs Enrollment") + xlab("Unemployment Rate") + ylab("Fall Enrollment")
myplot + geom_point(color='lightblue') + geom_line(color='lightblue') + theme_bw()
library(ggplot2)
library(ggthemes)
myplot = ggplot(enroll, aes(x=YEAR, y=ROLL)) + ggtitle("Year vs Enrollment") + xlab("Year") + ylab("Fall Enrollment")
myplot + geom_point(color='lightpink') + geom_line(color='lightpink') + theme_bw()
library(ggplot2)
library(ggthemes)
myplot = ggplot(enroll, aes(x=HGRAD, y=ROLL)) + ggtitle("High School Graduates vs Enrollment") + xlab("High School Graduates") + ylab("Fall Enrollment")
myplot + geom_point(color='goldenrod4') + geom_line(color='goldenrod4') + theme_bw()
library(ggplot2)
library(ggthemes)
myplot = ggplot(enroll, aes(x=INC, y=ROLL)) + ggtitle("Monthly Per Capita Income vs Enrollment") + xlab("Monthly per Capita Income in 1961 US$") + ylab("Fall Enrollment")
myplot + geom_point(color='forestgreen') + geom_line(color='forestgreen') + theme_bw()
ROLL ~ UNEM + HGRAD
lm(ROLL ~ UNEM + HGRAD, data = enroll)
##
## Call:
## lm(formula = ROLL ~ UNEM + HGRAD, data = enroll)
##
## Coefficients:
## (Intercept) UNEM HGRAD
## -8255.7511 698.2681 0.9423
fall_enroll = lm(ROLL ~ UNEM + HGRAD, data = enroll)
class(fall_enroll)
## [1] "lm"
str(fall_enroll)
## List of 12
## $ coefficients : Named num [1:3] -8255.751 698.268 0.942
## ..- attr(*, "names")= chr [1:3] "(Intercept)" "UNEM" "HGRAD"
## $ residuals : Named num [1:29] -900 192 618 -418 -1744 ...
## ..- attr(*, "names")= chr [1:29] "1" "2" "3" "4" ...
## $ effects : Named num [1:29] -68429 6739 14362 -414 -1634 ...
## ..- attr(*, "names")= chr [1:29] "(Intercept)" "UNEM" "HGRAD" "" ...
## $ rank : int 3
## $ fitted.values: Named num [1:29] 6401 5753 6011 7974 10460 ...
## ..- attr(*, "names")= chr [1:29] "1" "2" "3" "4" ...
## $ assign : int [1:3] 0 1 2
## $ qr :List of 5
## ..$ qr : num [1:29, 1:3] -5.385 0.186 0.186 0.186 0.186 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:29] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:3] "(Intercept)" "UNEM" "HGRAD"
## .. ..- attr(*, "assign")= int [1:3] 0 1 2
## ..$ qraux: num [1:3] 1.19 1.13 1.33
## ..$ pivot: int [1:3] 1 2 3
## ..$ tol : num 1e-07
## ..$ rank : int 3
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 26
## $ xlevels : Named list()
## $ call : language lm(formula = ROLL ~ UNEM + HGRAD, data = enroll)
## $ terms :Classes 'terms', 'formula' language ROLL ~ UNEM + HGRAD
## .. ..- attr(*, "variables")= language list(ROLL, UNEM, HGRAD)
## .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:3] "ROLL" "UNEM" "HGRAD"
## .. .. .. ..$ : chr [1:2] "UNEM" "HGRAD"
## .. ..- attr(*, "term.labels")= chr [1:2] "UNEM" "HGRAD"
## .. ..- attr(*, "order")= int [1:2] 1 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(ROLL, UNEM, HGRAD)
## .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
## .. .. ..- attr(*, "names")= chr [1:3] "ROLL" "UNEM" "HGRAD"
## $ model :'data.frame': 29 obs. of 3 variables:
## ..$ ROLL : int [1:29] 5501 5945 6629 7556 8716 9369 9920 10167 11084 12504 ...
## ..$ UNEM : num [1:29] 8.1 7 7.3 7.5 7 6.4 6.5 6.4 6.3 7.7 ...
## ..$ HGRAD: int [1:29] 9552 9680 9731 11666 14675 15265 15484 15723 16501 16890 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language ROLL ~ UNEM + HGRAD
## .. .. ..- attr(*, "variables")= language list(ROLL, UNEM, HGRAD)
## .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:3] "ROLL" "UNEM" "HGRAD"
## .. .. .. .. ..$ : chr [1:2] "UNEM" "HGRAD"
## .. .. ..- attr(*, "term.labels")= chr [1:2] "UNEM" "HGRAD"
## .. .. ..- attr(*, "order")= int [1:2] 1 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(ROLL, UNEM, HGRAD)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:3] "ROLL" "UNEM" "HGRAD"
## - attr(*, "class")= chr "lm"
Notes (unsure where this could go): new_enroll = 16081 - mean(enroll$HGRAD) -8255.7511 + (698.2681 * new_enroll)
predict_fall = data.frame(UNEM + HGRAD = new_enroll) predict(fall_enroll, predict_fall)
predict(fall_enroll, predict_fall, interval = “prediction”)
summary(fall_enroll)
##
## Call:
## lm(formula = ROLL ~ UNEM + HGRAD, data = enroll)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2102.2 -861.6 -349.4 374.5 3603.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.256e+03 2.052e+03 -4.023 0.00044 ***
## UNEM 6.983e+02 2.244e+02 3.111 0.00449 **
## HGRAD 9.423e-01 8.613e-02 10.941 3.16e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1313 on 26 degrees of freedom
## Multiple R-squared: 0.8489, Adjusted R-squared: 0.8373
## F-statistic: 73.03 on 2 and 26 DF, p-value: 2.144e-11
anova(fall_enroll)
## Analysis of Variance Table
##
## Response: ROLL
## Df Sum Sq Mean Sq F value Pr(>F)
## UNEM 1 45407767 45407767 26.349 2.366e-05 ***
## HGRAD 1 206279143 206279143 119.701 3.157e-11 ***
## Residuals 26 44805568 1723291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hist(residuals(fall_enroll))
plot(fall_enroll, which = 1)
predict_fall = data.frame(UNEM = 0.09, HGRAD = 25000)
predict(fall_enroll, predict_fall)
## 1
## 15364.01
Compare the two models with anova(). Does including this variable improve the model? Including this variable does improve the model as seen in the significance code where all three variables are now coded as three stars
lm(ROLL ~ UNEM + HGRAD + INC, data = enroll)
##
## Call:
## lm(formula = ROLL ~ UNEM + HGRAD + INC, data = enroll)
##
## Coefficients:
## (Intercept) UNEM HGRAD INC
## -9153.2545 450.1245 0.4065 4.2749
fall_enroll2 = lm(ROLL ~ UNEM + HGRAD + INC, data = enroll)
summary(fall_enroll2)
##
## Call:
## lm(formula = ROLL ~ UNEM + HGRAD + INC, data = enroll)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1148.84 -489.71 -1.88 387.40 1425.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.153e+03 1.053e+03 -8.691 5.02e-09 ***
## UNEM 4.501e+02 1.182e+02 3.809 0.000807 ***
## HGRAD 4.065e-01 7.602e-02 5.347 1.52e-05 ***
## INC 4.275e+00 4.947e-01 8.642 5.59e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 670.4 on 25 degrees of freedom
## Multiple R-squared: 0.9621, Adjusted R-squared: 0.9576
## F-statistic: 211.5 on 3 and 25 DF, p-value: < 2.2e-16
anova(fall_enroll)
## Analysis of Variance Table
##
## Response: ROLL
## Df Sum Sq Mean Sq F value Pr(>F)
## UNEM 1 45407767 45407767 26.349 2.366e-05 ***
## HGRAD 1 206279143 206279143 119.701 3.157e-11 ***
## Residuals 26 44805568 1723291
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fall_enroll2)
## Analysis of Variance Table
##
## Response: ROLL
## Df Sum Sq Mean Sq F value Pr(>F)
## UNEM 1 45407767 45407767 101.02 2.894e-10 ***
## HGRAD 1 206279143 206279143 458.92 < 2.2e-16 ***
## INC 1 33568255 33568255 74.68 5.594e-09 ***
## Residuals 25 11237313 449493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1