R Data Analysis Examples: Poisson Regression: http://www.ats.ucla.edu/stat/r/dae/poissonreg.htm
library(ggplot2)
dat <- read.csv("http://www.ats.ucla.edu/stat/data/poisson_sim.csv")
dat <- within(dat, {
prog <- factor(prog, levels=1:3, labels=c("General", "Academic", "Vocational"))
id <- factor(id)
})
summary(dat)
## id num_awards prog math
## 1 : 1 Min. :0.00 General : 45 Min. :33.00
## 2 : 1 1st Qu.:0.00 Academic :105 1st Qu.:45.00
## 3 : 1 Median :0.00 Vocational: 50 Median :52.00
## 4 : 1 Mean :0.63 Mean :52.65
## 5 : 1 3rd Qu.:1.00 3rd Qu.:59.00
## 6 : 1 Max. :6.00 Max. :75.00
## (Other):194
The rate ratio for award reception is 1.07 for each unit increment of math score. Here the observation time, say 5 years, is assumed to be the same for all individuals (thus, no need for offset).
model1 <- glm(formula = num_awards ~ prog + math,
family = poisson(link = "log"),
data = dat)
summary(model1)
##
## Call:
## glm(formula = num_awards ~ prog + math, family = poisson(link = "log"),
## data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2043 -0.8436 -0.5106 0.2558 2.6796
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.24712 0.65845 -7.969 1.60e-15 ***
## progAcademic 1.08386 0.35825 3.025 0.00248 **
## progVocational 0.36981 0.44107 0.838 0.40179
## math 0.07015 0.01060 6.619 3.63e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 287.67 on 199 degrees of freedom
## Residual deviance: 189.45 on 196 degrees of freedom
## AIC: 373.5
##
## Number of Fisher Scoring iterations: 6
tableone::ShowRegTable(model1)
## exp(beta) [confint] p
## (Intercept) 0.01 [0.00, 0.02] <0.001
## progAcademic 2.96 [1.55, 6.40] 0.002
## progVocational 1.45 [0.61, 3.55] 0.402
## math 1.07 [1.05, 1.10] <0.001
## Estimate of coefficient of math
(coefMath <- coef(model1)["math"])
## math
## 0.0701524
## F unction to estimate the point estimate of log RR
estFunc <- function(math) {
coefMath * math
}
## Var(estimated_coef_of_math)
(varCoefMath <- vcov(model1)["math","math"])
## [1] 0.0001123431
## Function to estimate the variance of log RR estimate at each math score level
varFunc <- function(math) {
varCoefMath * (math^2)
}
## X axis value is delta in math score
newdat <- data.frame(delta_math = seq(from = -10, to = +10, by = 0.1))
## log RR associated with that delta
newdat$logRR <- estFunc(newdat$delta_math)
## Variance of log RR
newdat$varLogRR <- varFunc(newdat$delta_math)
## 95% CI on log RR scale
newdat$lower <- newdat$logRR - qnorm(0.975) * sqrt(newdat$varLogRR)
newdat$upper <- newdat$logRR + qnorm(0.975) * sqrt(newdat$varLogRR)
ggplot(data = newdat, mapping = aes(x = delta_math, y = logRR)) +
layer(geom = "line") +
layer(geom = "ribbon", mapping = aes(ymin = lower, ymax = upper),
color = "gray", alpha = 0.3) +
theme_bw() + theme(legend.key = element_blank())
ggplot(data = newdat, mapping = aes(x = delta_math, y = exp(logRR))) +
layer(geom = "line") +
layer(geom = "ribbon", mapping = aes(ymin = exp(lower), ymax = exp(upper)),
color = "gray", alpha = 0.3) +
theme_bw() + theme(legend.key = element_blank())
## Fix program variable at the baseline General
newdat2 <- data.frame(prog = "General",
math = seq(min(dat$math), max(dat$math), by = 0.1))
## log rate estimate
newdat2$logRate <- predict(model1, newdata = newdat2)
## standard error for estiamte
newdat2$seLogRate <- predict(model1, newdata = newdat2, se.fit = TRUE)$se.fit
## 95% CI on log rate scale
newdat2$lower <- newdat2$logRate - qnorm(0.975) * newdat2$seLogRate
newdat2$upper <- newdat2$logRate + qnorm(0.975) * newdat2$seLogRate
ggplot(data = newdat2, mapping = aes(x = math, y = logRate)) +
layer(geom = "line") +
layer(geom = "ribbon", mapping = aes(ymin = lower, ymax = upper),
color = "gray", alpha = 0.3) +
theme_bw() + theme(legend.key = element_blank())
ggplot(data = newdat2, mapping = aes(x = math, y = exp(logRate))) +
layer(geom = "line") +
layer(geom = "ribbon", mapping = aes(ymin = exp(lower), ymax = exp(upper)),
color = "gray", alpha = 0.3) +
theme_bw() + theme(legend.key = element_blank())