# Load required libraries
library (dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library (ggplot2)
# Print working directory and files
print (getwd ())
[1] "/Users/seanlawler/Downloads/Intro to R/Module 15"
[1] "~$dule 15.docx" "15a_files"
[3] "15a.html" "15a.qmd"
[5] "15b_files" "15b.html"
[7] "15b.qmd" "15b.rmarkdown"
[9] "ca_places.zip" "gapminderData5.csv"
[11] "GEOG_5680_15a.html" "GEOG_5680_15b.html"
[13] "GEOG_5680_15c.html" "GEOG_5680_15d.html"
[15] "hsa.csv" "irished.csv"
[17] "LightRail_UTA" "LightRail_UTA.zip"
[19] "LightRailStations_UTA" "LightRailStations_UTA.zip"
[21] "Module 15.docx" "Module 15.pdf"
[23] "Module15_abc.qmd" "Module15a_files"
[25] "Module15a-b_files" "Module15a-b.html"
[27] "Module15a.html" "orthodont.csv"
[29] "rs.zip" "rsconnect"
[31] "slc_tract" "slc_tract.zip"
[33] "WNAclimate.csv"
# Binomial Logistic Regression
irished <- read.csv ("irished.csv" )
irished$ sex <- factor (irished$ sex, labels = c ("male" , "female" ))
irished$ lvcert <- factor (irished$ lvcert, labels = c ("not taken" , "taken" ))
irished$ DVRT.cen <- irished$ DVRT - mean (irished$ DVRT)
model_binom <- glm (lvcert ~ DVRT.cen, data = irished, family = binomial (link = "logit" ))
print (summary (model_binom))
Call:
glm(formula = lvcert ~ DVRT.cen, family = binomial(link = "logit"),
data = irished)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.278422 0.099665 -2.794 0.00521 **
DVRT.cen 0.064369 0.007576 8.496 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 686.86 on 499 degrees of freedom
Residual deviance: 593.77 on 498 degrees of freedom
AIC: 597.77
Number of Fisher Scoring iterations: 3
print (exp (coef (model_binom))) # odds ratios
(Intercept) DVRT.cen
0.7569771 1.0664856
# Plot predicted probabilities
irished$ predicted <- predict (model_binom, type = "response" )
ggplot (irished, aes (x = DVRT.cen + mean (irished$ DVRT), y = predicted)) +
geom_point (alpha = 0.3 ) +
geom_smooth (method = "loess" , color = "blue" ) +
labs (title = "Predicted Probability of Taking Leaving Cert" ,
x = "DVRT Score" , y = "Probability" )
Warning: Use of `irished$DVRT` is discouraged.
ℹ Use `DVRT` instead.
Use of `irished$DVRT` is discouraged.
ℹ Use `DVRT` instead.
`geom_smooth()` using formula = 'y ~ x'
# Poisson Regression
hsa <- read.csv ("hsa.csv" )
hsa$ prog <- factor (hsa$ prog, labels = c ("General" , "Academic" , "Vocational" ))
hsa$ math.cen <- hsa$ math - mean (hsa$ math)
model_pois <- glm (num_awards ~ math.cen + prog, data = hsa, family = poisson (link = "log" ))
print (summary (model_pois))
Call:
glm(formula = num_awards ~ math.cen + prog, family = poisson(link = "log"),
data = hsa)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.55395 0.33348 -4.660 3.16e-06 ***
math.cen 0.07015 0.01060 6.619 3.63e-11 ***
progAcademic 1.08386 0.35825 3.025 0.00248 **
progVocational 0.36981 0.44107 0.838 0.40179
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 287.67 on 199 degrees of freedom
Residual deviance: 189.45 on 196 degrees of freedom
AIC: 373.5
Number of Fisher Scoring iterations: 6
print (exp (coef (model_pois))) # incident rate ratios
(Intercept) math.cen progAcademic progVocational
0.2114109 1.0726716 2.9560655 1.4474585
# Observed vs. Predicted Plot
hsa$ predicted <- predict (model_pois, type = "response" )
ggplot (hsa, aes (x = predicted, y = num_awards)) +
geom_jitter (alpha = 0.3 ) +
geom_smooth (method = "lm" , se = FALSE , color = "darkgreen" ) +
labs (title = "Observed vs. Predicted Number of Awards" ,
x = "Predicted Awards" , y = "Observed Awards" )
`geom_smooth()` using formula = 'y ~ x'
# ANOVA Model Comparison
model_simple <- glm (num_awards ~ math.cen, data = hsa, family = poisson (link = "log" ))
anova_result <- anova (model_simple, model_pois, test = "Chisq" )
print (anova_result)
Analysis of Deviance Table
Model 1: num_awards ~ math.cen
Model 2: num_awards ~ math.cen + prog
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 198 204.02
2 196 189.45 2 14.572 0.0006852 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1