Module 15

Author

K M

Module 15

irished<- read.csv("~/Desktop/GEOG 5680/Data/irished.csv")
hsa <- read.csv("~/Desktop/GEOG 5680/Data/hsa.csv")

Generalized Linear Models (GLM)

OLS = y = BX + e

GLM = E(y) = g(u) = BX

GLM used for non-normal data

Binomial Models

irished$sex = factor(irished$sex, levels = c(1,2),
                     labels = c("male", "female"))
irished$lvcert = factor(irished$lvcert, levels = c(0,1),
                        labels = c("not taken", "taken"))

irished$DVRT.cen = irished$DVRT - mean(irished$DVRT)

boxplot(DVRT.cen ~ lvcert, irished)

irished.glm1 = glm(lvcert ~ DVRT.cen, data = irished, family = binomial(link = 'logit'))
summary(irished.glm1)

Call:
glm(formula = lvcert ~ DVRT.cen, family = binomial(link = "logit"), 
    data = irished)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.278422   0.099665  -2.794  0.00521 ** 
DVRT.cen     0.064369   0.007576   8.496  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 686.86  on 499  degrees of freedom
Residual deviance: 593.77  on 498  degrees of freedom
AIC: 597.77

Number of Fisher Scoring iterations: 3
exp(coef(irished.glm1))
(Intercept)    DVRT.cen 
  0.7569771   1.0664856 
newDVRT = data.frame(DVRT.cen=120-mean(irished$DVRT))
predict(irished.glm1, newdata = newDVRT)
        1 
0.9991683 
predict(irished.glm1, newdata = newDVRT, type = 'response', se.fit = T)
$fit
       1 
0.730895 

$se.fit
         1 
0.03350827 

$residual.scale
[1] 1
newDVRT = data.frame(DVRT.cen=seq(60,160)-mean(irished$DVRT))
lvcert.pred = predict(irished.glm1,
                      newdata = newDVRT, type = "response")
plot(newDVRT$DVRT.cen+mean(irished$DVRT),
     lvcert.pred, type = 'l', col = 2, lwd = 2,
     xlab = "DVRT", ylab = "Pr(lvcert)")

anova(irished.glm1, test = "Chisq")
Analysis of Deviance Table

Model: binomial, link: logit

Response: lvcert

Terms added sequentially (first to last)

         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
NULL                       499     686.86              
DVRT.cen  1   93.095       498     593.77 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Poisson Models

hsa$prog = factor(hsa$prog, levels = c(1,2,3),
                  labels = c("General", "Academic", "Vocational"))
hsa$math.cen = hsa$math - mean(hsa$math)

boxplot(math ~ num_awards, data = hsa)

boxplot(math ~ prog, data = hsa)

hsa.glm = glm(num_awards ~ math.cen + prog,
              data = hsa, family = poisson(link = "log"))
summary(hsa.glm)

Call:
glm(formula = num_awards ~ math.cen + prog, family = poisson(link = "log"), 
    data = hsa)

Coefficients:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)    -1.55395    0.33348  -4.660 3.16e-06 ***
math.cen        0.07015    0.01060   6.619 3.63e-11 ***
progAcademic    1.08386    0.35825   3.025  0.00248 ** 
progVocational  0.36981    0.44107   0.838  0.40179    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 287.67  on 199  degrees of freedom
Residual deviance: 189.45  on 196  degrees of freedom
AIC: 373.5

Number of Fisher Scoring iterations: 6
exp(coef(hsa.glm))
   (Intercept)       math.cen   progAcademic progVocational 
     0.2114109      1.0726716      2.9560655      1.4474585 
levels(hsa$prog)
[1] "General"    "Academic"   "Vocational"
newStudent = data.frame(math.cen = 70 - mean(hsa$math),
                        prog = "Academic")
predict(hsa.glm, newdata = newStudent,
        type = "response", se.fit = T)
$fit
       1 
2.111508 

$se.fit
       1 
0.275062 

$residual.scale
[1] 1
hsa.glm2 = glm(num_awards ~ math.cen,
               data = hsa, family = poisson(link = "log"))
summary(hsa.glm2)

Call:
glm(formula = num_awards ~ math.cen, family = poisson(link = "log"), 
    data = hsa)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.797344   0.116424  -6.849 7.46e-12 ***
math.cen     0.086166   0.009679   8.902  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 287.67  on 199  degrees of freedom
Residual deviance: 204.02  on 198  degrees of freedom
AIC: 384.08

Number of Fisher Scoring iterations: 6
exp(coef(hsa.glm2))
(Intercept)    math.cen 
  0.4505239   1.0899868 
anova(hsa.glm2, hsa.glm, test = "Chisq")
Analysis of Deviance Table

Model 1: num_awards ~ math.cen
Model 2: num_awards ~ math.cen + prog
  Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
1       198     204.02                          
2       196     189.45  2   14.572 0.0006852 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1