irished<- read.csv("~/Desktop/GEOG 5680/Data/irished.csv")
hsa <- read.csv("~/Desktop/GEOG 5680/Data/hsa.csv")Module 15
Module 15
Generalized Linear Models (GLM)
OLS = y = BX + e
GLM = E(y) = g(u) = BX
GLM used for non-normal data
Binomial Models
irished$sex = factor(irished$sex, levels = c(1,2),
labels = c("male", "female"))
irished$lvcert = factor(irished$lvcert, levels = c(0,1),
labels = c("not taken", "taken"))
irished$DVRT.cen = irished$DVRT - mean(irished$DVRT)
boxplot(DVRT.cen ~ lvcert, irished)irished.glm1 = glm(lvcert ~ DVRT.cen, data = irished, family = binomial(link = 'logit'))
summary(irished.glm1)
Call:
glm(formula = lvcert ~ DVRT.cen, family = binomial(link = "logit"),
data = irished)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.278422 0.099665 -2.794 0.00521 **
DVRT.cen 0.064369 0.007576 8.496 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 686.86 on 499 degrees of freedom
Residual deviance: 593.77 on 498 degrees of freedom
AIC: 597.77
Number of Fisher Scoring iterations: 3
exp(coef(irished.glm1))(Intercept) DVRT.cen
0.7569771 1.0664856
newDVRT = data.frame(DVRT.cen=120-mean(irished$DVRT))
predict(irished.glm1, newdata = newDVRT) 1
0.9991683
predict(irished.glm1, newdata = newDVRT, type = 'response', se.fit = T)$fit
1
0.730895
$se.fit
1
0.03350827
$residual.scale
[1] 1
newDVRT = data.frame(DVRT.cen=seq(60,160)-mean(irished$DVRT))
lvcert.pred = predict(irished.glm1,
newdata = newDVRT, type = "response")
plot(newDVRT$DVRT.cen+mean(irished$DVRT),
lvcert.pred, type = 'l', col = 2, lwd = 2,
xlab = "DVRT", ylab = "Pr(lvcert)")anova(irished.glm1, test = "Chisq")Analysis of Deviance Table
Model: binomial, link: logit
Response: lvcert
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 499 686.86
DVRT.cen 1 93.095 498 593.77 < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Poisson Models
hsa$prog = factor(hsa$prog, levels = c(1,2,3),
labels = c("General", "Academic", "Vocational"))
hsa$math.cen = hsa$math - mean(hsa$math)
boxplot(math ~ num_awards, data = hsa)boxplot(math ~ prog, data = hsa)hsa.glm = glm(num_awards ~ math.cen + prog,
data = hsa, family = poisson(link = "log"))
summary(hsa.glm)
Call:
glm(formula = num_awards ~ math.cen + prog, family = poisson(link = "log"),
data = hsa)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.55395 0.33348 -4.660 3.16e-06 ***
math.cen 0.07015 0.01060 6.619 3.63e-11 ***
progAcademic 1.08386 0.35825 3.025 0.00248 **
progVocational 0.36981 0.44107 0.838 0.40179
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 287.67 on 199 degrees of freedom
Residual deviance: 189.45 on 196 degrees of freedom
AIC: 373.5
Number of Fisher Scoring iterations: 6
exp(coef(hsa.glm)) (Intercept) math.cen progAcademic progVocational
0.2114109 1.0726716 2.9560655 1.4474585
levels(hsa$prog)[1] "General" "Academic" "Vocational"
newStudent = data.frame(math.cen = 70 - mean(hsa$math),
prog = "Academic")
predict(hsa.glm, newdata = newStudent,
type = "response", se.fit = T)$fit
1
2.111508
$se.fit
1
0.275062
$residual.scale
[1] 1
hsa.glm2 = glm(num_awards ~ math.cen,
data = hsa, family = poisson(link = "log"))
summary(hsa.glm2)
Call:
glm(formula = num_awards ~ math.cen, family = poisson(link = "log"),
data = hsa)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.797344 0.116424 -6.849 7.46e-12 ***
math.cen 0.086166 0.009679 8.902 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 287.67 on 199 degrees of freedom
Residual deviance: 204.02 on 198 degrees of freedom
AIC: 384.08
Number of Fisher Scoring iterations: 6
exp(coef(hsa.glm2))(Intercept) math.cen
0.4505239 1.0899868
anova(hsa.glm2, hsa.glm, test = "Chisq")Analysis of Deviance Table
Model 1: num_awards ~ math.cen
Model 2: num_awards ~ math.cen + prog
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 198 204.02
2 196 189.45 2 14.572 0.0006852 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1