Poisson regression rate ratio plot

References

R Data Analysis Examples: Poisson Regression: http://www.ats.ucla.edu/stat/r/dae/poissonreg.htm

Load packages

library(ggplot2)

Load data

dat <- read.csv("http://www.ats.ucla.edu/stat/data/poisson_sim.csv")
dat <- within(dat, {
    prog <- factor(prog, levels=1:3, labels=c("General", "Academic", "Vocational"))
    id <- factor(id)
})
summary(dat)
##        id        num_awards           prog          math      
##  1      :  1   Min.   :0.00   General   : 45   Min.   :33.00  
##  2      :  1   1st Qu.:0.00   Academic  :105   1st Qu.:45.00  
##  3      :  1   Median :0.00   Vocational: 50   Median :52.00  
##  4      :  1   Mean   :0.63                    Mean   :52.65  
##  5      :  1   3rd Qu.:1.00                    3rd Qu.:59.00  
##  6      :  1   Max.   :6.00                    Max.   :75.00  
##  (Other):194

Poisson regression model fitting

The rate ratio for award reception is 1.07 for each unit increment of math score. Here the observation time, say 5 years, is assumed to be the same for all individuals (thus, no need for offset).

model1 <- glm(formula = num_awards ~ prog + math,
              family  = poisson(link = "log"),
              data    = dat)

summary(model1)
## 
## Call:
## glm(formula = num_awards ~ prog + math, family = poisson(link = "log"), 
##     data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2043  -0.8436  -0.5106   0.2558   2.6796  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -5.24712    0.65845  -7.969 1.60e-15 ***
## progAcademic    1.08386    0.35825   3.025  0.00248 ** 
## progVocational  0.36981    0.44107   0.838  0.40179    
## math            0.07015    0.01060   6.619 3.63e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 287.67  on 199  degrees of freedom
## Residual deviance: 189.45  on 196  degrees of freedom
## AIC: 373.5
## 
## Number of Fisher Scoring iterations: 6
tableone::ShowRegTable(model1)
##                exp(beta) [confint] p     
## (Intercept)    0.01 [0.00, 0.02]   <0.001
## progAcademic   2.96 [1.55, 6.40]    0.002
## progVocational 1.45 [0.61, 3.55]    0.402
## math           1.07 [1.05, 1.10]   <0.001

Create functions to calculate RR estimate and variance

## Estimate of coefficient of math
(coefMath <- coef(model1)["math"])
##      math 
## 0.0701524
## F unction to estimate the point estimate of log RR
estFunc <- function(math) {
    coefMath * math
}

## Var(estimated_coef_of_math)
(varCoefMath <- vcov(model1)["math","math"])
## [1] 0.0001123431
## Function to estimate the variance of log RR estimate at each math score level
varFunc <- function(math) {
    varCoefMath * (math^2)
}

Create a data set to plot

## X axis value is delta in math score
newdat <- data.frame(delta_math = seq(from = -10, to = +10, by = 0.1))
## log RR associated with that delta
newdat$logRR <- estFunc(newdat$delta_math)
## Variance of log RR
newdat$varLogRR <- varFunc(newdat$delta_math)
## 95% CI on log RR scale
newdat$lower <- newdat$logRR - qnorm(0.975) * sqrt(newdat$varLogRR)
newdat$upper <- newdat$logRR + qnorm(0.975) * sqrt(newdat$varLogRR)

Plot on log RR scale

ggplot(data = newdat, mapping = aes(x = delta_math, y = logRR)) +
    layer(geom = "line") +
    layer(geom = "ribbon", mapping = aes(ymin = lower, ymax = upper),
          color = "gray", alpha = 0.3) +
    theme_bw() + theme(legend.key = element_blank())

plot of chunk unnamed-chunk-7

Plot on RR scale

ggplot(data = newdat, mapping = aes(x = delta_math, y = exp(logRR))) +
    layer(geom = "line") +
    layer(geom = "ribbon", mapping = aes(ymin = exp(lower), ymax = exp(upper)),
          color = "gray", alpha = 0.3) +
    theme_bw() + theme(legend.key = element_blank())

plot of chunk unnamed-chunk-8

Plot on log rate (NOT rate ratio) scale

## Fix program variable at the baseline General
newdat2 <- data.frame(prog = "General",
                      math = seq(min(dat$math), max(dat$math), by = 0.1))
## log rate estimate
newdat2$logRate <- predict(model1, newdata = newdat2)
## standard error for estiamte
newdat2$seLogRate <- predict(model1, newdata = newdat2, se.fit = TRUE)$se.fit
## 95% CI on log rate scale
newdat2$lower <- newdat2$logRate - qnorm(0.975) * newdat2$seLogRate
newdat2$upper <- newdat2$logRate + qnorm(0.975) * newdat2$seLogRate

ggplot(data = newdat2, mapping = aes(x = math, y = logRate)) +
    layer(geom = "line") +
    layer(geom = "ribbon", mapping = aes(ymin = lower, ymax = upper),
          color = "gray", alpha = 0.3) +
    theme_bw() + theme(legend.key = element_blank())

plot of chunk unnamed-chunk-9

ggplot(data = newdat2, mapping = aes(x = math, y = exp(logRate))) +
    layer(geom = "line") +
    layer(geom = "ribbon", mapping = aes(ymin = exp(lower), ymax = exp(upper)),
          color = "gray", alpha = 0.3) +
    theme_bw() + theme(legend.key = element_blank())

plot of chunk unnamed-chunk-9