Module 15b

Author

Sean Lawler

# Load required libraries
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)

# Print working directory and files
print(getwd())
[1] "/Users/seanlawler/Downloads/Intro to R/Module 15"
print(list.files())
 [1] "~$dule 15.docx"            "15a_files"                
 [3] "15a.html"                  "15a.qmd"                  
 [5] "15b_files"                 "15b.html"                 
 [7] "15b.qmd"                   "15b.rmarkdown"            
 [9] "ca_places.zip"             "gapminderData5.csv"       
[11] "GEOG_5680_15a.html"        "GEOG_5680_15b.html"       
[13] "GEOG_5680_15c.html"        "GEOG_5680_15d.html"       
[15] "hsa.csv"                   "irished.csv"              
[17] "LightRail_UTA"             "LightRail_UTA.zip"        
[19] "LightRailStations_UTA"     "LightRailStations_UTA.zip"
[21] "Module 15.docx"            "Module 15.pdf"            
[23] "Module15_abc.qmd"          "Module15a_files"          
[25] "Module15a-b_files"         "Module15a-b.html"         
[27] "Module15a.html"            "orthodont.csv"            
[29] "rs.zip"                    "rsconnect"                
[31] "slc_tract"                 "slc_tract.zip"            
[33] "WNAclimate.csv"           
# Binomial Logistic Regression
irished <- read.csv("irished.csv")
irished$sex <- factor(irished$sex, labels = c("male", "female"))
irished$lvcert <- factor(irished$lvcert, labels = c("not taken", "taken"))
irished$DVRT.cen <- irished$DVRT - mean(irished$DVRT)

model_binom <- glm(lvcert ~ DVRT.cen, data = irished, family = binomial(link = "logit"))
print(summary(model_binom))

Call:
glm(formula = lvcert ~ DVRT.cen, family = binomial(link = "logit"), 
    data = irished)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.278422   0.099665  -2.794  0.00521 ** 
DVRT.cen     0.064369   0.007576   8.496  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 686.86  on 499  degrees of freedom
Residual deviance: 593.77  on 498  degrees of freedom
AIC: 597.77

Number of Fisher Scoring iterations: 3
print(exp(coef(model_binom)))  # odds ratios
(Intercept)    DVRT.cen 
  0.7569771   1.0664856 
# Plot predicted probabilities
irished$predicted <- predict(model_binom, type = "response")
ggplot(irished, aes(x = DVRT.cen + mean(irished$DVRT), y = predicted)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "loess", color = "blue") +
  labs(title = "Predicted Probability of Taking Leaving Cert",
       x = "DVRT Score", y = "Probability")
Warning: Use of `irished$DVRT` is discouraged.
ℹ Use `DVRT` instead.
Use of `irished$DVRT` is discouraged.
ℹ Use `DVRT` instead.
`geom_smooth()` using formula = 'y ~ x'

# Poisson Regression
hsa <- read.csv("hsa.csv")
hsa$prog <- factor(hsa$prog, labels = c("General", "Academic", "Vocational"))
hsa$math.cen <- hsa$math - mean(hsa$math)

model_pois <- glm(num_awards ~ math.cen + prog, data = hsa, family = poisson(link = "log"))
print(summary(model_pois))

Call:
glm(formula = num_awards ~ math.cen + prog, family = poisson(link = "log"), 
    data = hsa)

Coefficients:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)    -1.55395    0.33348  -4.660 3.16e-06 ***
math.cen        0.07015    0.01060   6.619 3.63e-11 ***
progAcademic    1.08386    0.35825   3.025  0.00248 ** 
progVocational  0.36981    0.44107   0.838  0.40179    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 287.67  on 199  degrees of freedom
Residual deviance: 189.45  on 196  degrees of freedom
AIC: 373.5

Number of Fisher Scoring iterations: 6
print(exp(coef(model_pois)))  # incident rate ratios
   (Intercept)       math.cen   progAcademic progVocational 
     0.2114109      1.0726716      2.9560655      1.4474585 
# Observed vs. Predicted Plot
hsa$predicted <- predict(model_pois, type = "response")
ggplot(hsa, aes(x = predicted, y = num_awards)) +
  geom_jitter(alpha = 0.3) +
  geom_smooth(method = "lm", se = FALSE, color = "darkgreen") +
  labs(title = "Observed vs. Predicted Number of Awards",
       x = "Predicted Awards", y = "Observed Awards")
`geom_smooth()` using formula = 'y ~ x'

# ANOVA Model Comparison
model_simple <- glm(num_awards ~ math.cen, data = hsa, family = poisson(link = "log"))
anova_result <- anova(model_simple, model_pois, test = "Chisq")
print(anova_result)
Analysis of Deviance Table

Model 1: num_awards ~ math.cen
Model 2: num_awards ~ math.cen + prog
  Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
1       198     204.02                          
2       196     189.45  2   14.572 0.0006852 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1