EPI204 Lab 5 in R

References

Prepare data

## Load sas7bdat package
library(sas7bdat)

## Load three datasets as a list of data frames
cities <- lapply(c("lafinal","nycfinal","chicagofinal"),
                 function(X){
                     read.sas7bdat(paste0(X,".sas7bdat"))
                 })
## add city names
cities2 <- lapply(1:3, function(i) {

    cities[[i]]$city <- c("LA","NYC","Chicago")[i]
    cities[[i]]
})

## Merge as a data frame
cities2 <- do.call(rbind, cities2)

cities3 <- within(cities2, {

    newrace <- race
    newrace[is.nan(race)] <- NA
    newrace[race == 0] <- NA
    newrace[race >= 4] <- 3
    newrace <- factor(newrace)

    mitype[mitype == 0] <- NA

    male <- as.numeric(sex == 1)

    days_cc[is.na(days_cc)] <- 0
    days_ic[is.na(days_ic)] <- 0

    age65 <- age - 65

 })

cities3compl <- cities3[complete.cases(cities3[,c("newrace","mitype")]),]

III Poisson regression

  1. describe data
## Continuous
library(psych)
describe(cities3compl[,c("age","days_cc","days_ic","fuptime")])
        var      n  mean   sd median trimmed  mad min max range  skew kurtosis   se
age       1 182149 76.94 7.70     76   76.54 8.90  65 117    52  0.41    -0.53 0.02
days_cc   2 182149  1.59 3.56      0    0.80 0.00   0 126   126  6.66   108.71 0.01
days_ic   3 182149  1.88 4.45      0    0.97 0.00   0 338   338 12.00   456.76 0.01
fuptime   4 182149  7.27 5.58      6    6.83 7.41   1  19    18  0.44    -1.20 0.01

## Tabling
lapply(cities3compl[,c("readmcount","mitype","race","sex","yearadm1")], table)
$readmcount

     0      1      2      3      4      5      6      7      8      9     10     11     12     14     16 
139991  32471   6876   1892    562    208     80     37     15      6      6      2      1      1      1 

$mitype

    1     2     3     4     5     6     7     8     9    10 
 8221 32445  5048  3789 32366  3909  1461 78227  4610 12073 

$race

     1      2      3      4      5      6 
149843  21584   5869   1682   3126     45 

$sex

    1     2 
90454 91695 

$yearadm1

 1985  1986  1987  1988  1989  1990  1991  1992  1993  1994  1995  1996  1997  1998  1999  2000  2001  2002  2003 
 8396 11267 11484 10920 10147 10470 10136 10490 10562 10633 10376 10409  9971  9483  8973  8812  8896  9066  1658 

## Histogram of the outcome
hist(cities3compl$readmcount)

plot of chunk unnamed-chunk-5


## 4.a Poisson
res4 <- glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)),
            family  = poisson(link = "log"),
            data    = cities3compl)
summary(res4)

Call:
glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)), 
    family = poisson(link = "log"), data = cities3compl)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-3.776  -0.871  -0.519  -0.237   9.550  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.938049   0.010717  -367.5   <2e-16 ***
age65        0.053816   0.000552    97.5   <2e-16 ***
newrace2     0.184770   0.013303    13.9   <2e-16 ***
newrace3     0.517993   0.016905    30.6   <2e-16 ***
male         0.146657   0.008653    16.9   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 228183  on 182148  degrees of freedom
Residual deviance: 218482  on 182144  degrees of freedom
AIC: 310189

Number of Fisher Scoring iterations: 6

## 4.b Quasi-Poisson
res4q <- glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)),
             family  = quasipoisson(link = "log"),
             data    = cities3compl)
summary(res4q)

Call:
glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)), 
    family = quasipoisson(link = "log"), data = cities3compl)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-3.776  -0.871  -0.519  -0.237   9.550  

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.93805    0.02015 -195.44  < 2e-16 ***
age65        0.05382    0.00104   51.83  < 2e-16 ***
newrace2     0.18477    0.02501    7.39  1.5e-13 ***
newrace3     0.51799    0.03179   16.30  < 2e-16 ***
male         0.14666    0.01627    9.01  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 3.535)

    Null deviance: 228183  on 182148  degrees of freedom
Residual deviance: 218482  on 182144  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 6

## 4.c More predictors
res4q2 <- glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) + newrace + male + factor(yearadm1) + city + offset(log(fuptime)),
             family  = quasipoisson(link = "log"),
             data    = cities3compl)
summary(res4q2)

Call:
glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) + 
    newrace + male + factor(yearadm1) + city + offset(log(fuptime)), 
    family = quasipoisson(link = "log"), data = cities3compl)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-8.089  -0.850  -0.645  -0.233   9.554  

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)          -4.427685   0.053284  -83.10  < 2e-16 ***
age65                 0.042095   0.000966   43.59  < 2e-16 ***
days_cc               0.028011   0.001395   20.08  < 2e-16 ***
days_ic               0.017917   0.000706   25.39  < 2e-16 ***
factor(mitype)2       0.022467   0.043032    0.52  0.60160    
factor(mitype)3      -0.093984   0.064118   -1.47  0.14270    
factor(mitype)4      -0.212153   0.073241   -2.90  0.00377 ** 
factor(mitype)5      -0.074237   0.043485   -1.71  0.08779 .  
factor(mitype)6       0.073618   0.065957    1.12  0.26436    
factor(mitype)7      -0.232435   0.107205   -2.17  0.03015 *  
factor(mitype)8       0.476444   0.040054   11.90  < 2e-16 ***
factor(mitype)9       0.540657   0.055738    9.70  < 2e-16 ***
factor(mitype)10      0.566374   0.046179   12.26  < 2e-16 ***
newrace2              0.088319   0.022933    3.85  0.00012 ***
newrace3              0.295298   0.029545    9.99  < 2e-16 ***
male                  0.148237   0.014841    9.99  < 2e-16 ***
factor(yearadm1)1986 -0.097981   0.042713   -2.29  0.02179 *  
factor(yearadm1)1987  0.016009   0.042336    0.38  0.70532    
factor(yearadm1)1988  0.032769   0.043208    0.76  0.44821    
factor(yearadm1)1989  0.122264   0.043944    2.78  0.00540 ** 
factor(yearadm1)1990  0.188354   0.043264    4.35  1.3e-05 ***
factor(yearadm1)1991  0.140379   0.044044    3.19  0.00144 ** 
factor(yearadm1)1992  0.270947   0.043570    6.22  5.0e-10 ***
factor(yearadm1)1993  0.315588   0.043949    7.18  7.0e-13 ***
factor(yearadm1)1994  0.421165   0.043593    9.66  < 2e-16 ***
factor(yearadm1)1995  0.479647   0.044093   10.88  < 2e-16 ***
factor(yearadm1)1996  0.592984   0.044112   13.44  < 2e-16 ***
factor(yearadm1)1997  0.582094   0.045274   12.86  < 2e-16 ***
factor(yearadm1)1998  0.652852   0.046106   14.16  < 2e-16 ***
factor(yearadm1)1999  0.851148   0.047334   17.98  < 2e-16 ***
factor(yearadm1)2000  1.072305   0.047448   22.60  < 2e-16 ***
factor(yearadm1)2001  1.218718   0.048755   25.00  < 2e-16 ***
factor(yearadm1)2002  1.497612   0.051486   29.09  < 2e-16 ***
factor(yearadm1)2003  0.975960   0.113537    8.60  < 2e-16 ***
cityLA               -0.051384   0.019316   -2.66  0.00781 ** 
cityNYC               0.093646   0.017701    5.29  1.2e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 2.94)

    Null deviance: 228183  on 182148  degrees of freedom
Residual deviance: 202828  on 182113  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 7

5. Effect measure modification

res5 <- glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) + newrace + male + factor(yearadm1) + city + age65:male + offset(log(fuptime)),
            family  = quasipoisson(link = "log"),
            data    = cities3compl)
summary(res5)

Call:
glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) + 
    newrace + male + factor(yearadm1) + city + age65:male + offset(log(fuptime)), 
    family = quasipoisson(link = "log"), data = cities3compl)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-7.981  -0.850  -0.644  -0.234   9.582  

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)          -4.331737   0.054801  -79.05  < 2e-16 ***
age65                 0.035843   0.001312   27.32  < 2e-16 ***
days_cc               0.027749   0.001395   19.89  < 2e-16 ***
days_ic               0.017946   0.000711   25.24  < 2e-16 ***
factor(mitype)2       0.022181   0.042974    0.52  0.60575    
factor(mitype)3      -0.094205   0.064032   -1.47  0.14123    
factor(mitype)4      -0.210787   0.073142   -2.88  0.00395 ** 
factor(mitype)5      -0.075026   0.043426   -1.73  0.08405 .  
factor(mitype)6       0.072724   0.065869    1.10  0.26956    
factor(mitype)7      -0.231830   0.107060   -2.17  0.03036 *  
factor(mitype)8       0.473830   0.040001   11.85  < 2e-16 ***
factor(mitype)9       0.539750   0.055663    9.70  < 2e-16 ***
factor(mitype)10      0.563866   0.046117   12.23  < 2e-16 ***
newrace2              0.086580   0.022909    3.78  0.00016 ***
newrace3              0.293634   0.029505    9.95  < 2e-16 ***
male                 -0.031435   0.029453   -1.07  0.28583    
factor(yearadm1)1986 -0.099036   0.042656   -2.32  0.02025 *  
factor(yearadm1)1987  0.015590   0.042279    0.37  0.71232    
factor(yearadm1)1988  0.033287   0.043150    0.77  0.44045    
factor(yearadm1)1989  0.121485   0.043885    2.77  0.00564 ** 
factor(yearadm1)1990  0.189438   0.043205    4.38  1.2e-05 ***
factor(yearadm1)1991  0.141269   0.043986    3.21  0.00132 ** 
factor(yearadm1)1992  0.272902   0.043512    6.27  3.6e-10 ***
factor(yearadm1)1993  0.316737   0.043887    7.22  5.3e-13 ***
factor(yearadm1)1994  0.422529   0.043535    9.71  < 2e-16 ***
factor(yearadm1)1995  0.481522   0.044033   10.94  < 2e-16 ***
factor(yearadm1)1996  0.594964   0.044053   13.51  < 2e-16 ***
factor(yearadm1)1997  0.583271   0.045213   12.90  < 2e-16 ***
factor(yearadm1)1998  0.654958   0.046044   14.22  < 2e-16 ***
factor(yearadm1)1999  0.852374   0.047270   18.03  < 2e-16 ***
factor(yearadm1)2000  1.074197   0.047388   22.67  < 2e-16 ***
factor(yearadm1)2001  1.220374   0.048689   25.06  < 2e-16 ***
factor(yearadm1)2002  1.499895   0.051417   29.17  < 2e-16 ***
factor(yearadm1)2003  0.977267   0.113385    8.62  < 2e-16 ***
cityLA               -0.050705   0.019290   -2.63  0.00858 ** 
cityNYC               0.093631   0.017677    5.30  1.2e-07 ***
age65:male            0.013207   0.001874    7.05  1.8e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 2.932)

    Null deviance: 228183  on 182148  degrees of freedom
Residual deviance: 202682  on 182112  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 7

## Quadrantic age
res5q <- glm(formula = readmcount ~ age65 + I(age65^2) + days_cc + days_ic + factor(mitype) + newrace + male + factor(yearadm1) + city + (age65 + I(age65^2)):male + offset(log(fuptime)),
            family  = quasipoisson(link = "log"),
            data    = cities3compl)
summary(res5q)

Call:
glm(formula = readmcount ~ age65 + I(age65^2) + days_cc + days_ic + 
    factor(mitype) + newrace + male + factor(yearadm1) + city + 
    (age65 + I(age65^2)):male + offset(log(fuptime)), family = quasipoisson(link = "log"), 
    data = cities3compl)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-7.921  -0.851  -0.637  -0.232   9.534  

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)          -4.388486   0.060667  -72.34  < 2e-16 ***
age65                 0.045161   0.004601    9.82  < 2e-16 ***
I(age65^2)           -0.000303   0.000144   -2.11   0.0347 *  
days_cc               0.027671   0.001400   19.77  < 2e-16 ***
days_ic               0.017868   0.000715   25.01  < 2e-16 ***
factor(mitype)2       0.023576   0.042979    0.55   0.5833    
factor(mitype)3      -0.093248   0.064038   -1.46   0.1454    
factor(mitype)4      -0.208130   0.073151   -2.85   0.0044 ** 
factor(mitype)5      -0.073027   0.043432   -1.68   0.0927 .  
factor(mitype)6       0.073257   0.065875    1.11   0.2661    
factor(mitype)7      -0.230832   0.107071   -2.16   0.0311 *  
factor(mitype)8       0.474685   0.040003   11.87  < 2e-16 ***
factor(mitype)9       0.541286   0.055668    9.72  < 2e-16 ***
factor(mitype)10      0.567450   0.046123   12.30  < 2e-16 ***
newrace2              0.090299   0.022928    3.94  8.2e-05 ***
newrace3              0.294915   0.029512    9.99  < 2e-16 ***
male                 -0.072026   0.044200   -1.63   0.1032    
factor(yearadm1)1986 -0.098264   0.042661   -2.30   0.0213 *  
factor(yearadm1)1987  0.016998   0.042284    0.40   0.6877    
factor(yearadm1)1988  0.034672   0.043154    0.80   0.4217    
factor(yearadm1)1989  0.121916   0.043889    2.78   0.0055 ** 
factor(yearadm1)1990  0.190213   0.043210    4.40  1.1e-05 ***
factor(yearadm1)1991  0.142650   0.043991    3.24   0.0012 ** 
factor(yearadm1)1992  0.273836   0.043518    6.29  3.1e-10 ***
factor(yearadm1)1993  0.318616   0.043893    7.26  3.9e-13 ***
factor(yearadm1)1994  0.423975   0.043543    9.74  < 2e-16 ***
factor(yearadm1)1995  0.482227   0.044039   10.95  < 2e-16 ***
factor(yearadm1)1996  0.595581   0.044058   13.52  < 2e-16 ***
factor(yearadm1)1997  0.584621   0.045220   12.93  < 2e-16 ***
factor(yearadm1)1998  0.656603   0.046051   14.26  < 2e-16 ***
factor(yearadm1)1999  0.854361   0.047278   18.07  < 2e-16 ***
factor(yearadm1)2000  1.078205   0.047393   22.75  < 2e-16 ***
factor(yearadm1)2001  1.224644   0.048702   25.15  < 2e-16 ***
factor(yearadm1)2002  1.506453   0.051438   29.29  < 2e-16 ***
factor(yearadm1)2003  0.991942   0.113437    8.74  < 2e-16 ***
cityLA               -0.049075   0.019296   -2.54   0.0110 *  
cityNYC               0.094289   0.017680    5.33  9.7e-08 ***
age65:male            0.023407   0.006473    3.62   0.0003 ***
I(age65^2):male      -0.000408   0.000214   -1.90   0.0570 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasipoisson family taken to be 2.932)

    Null deviance: 228183  on 182148  degrees of freedom
Residual deviance: 202609  on 182110  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 7

## LRT
drop1(res5q, scope =  ~ I(age65^2), test = "Chisq")
Single term deletions

Model:
readmcount ~ age65 + I(age65^2) + days_cc + days_ic + factor(mitype) + 
    newrace + male + factor(yearadm1) + city + (age65 + I(age65^2)):male + 
    offset(log(fuptime))
           Df Deviance scaled dev. Pr(>Chi)  
<none>          202609                       
I(age65^2)  1   202622        4.52    0.033 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

IV. Overdispersed binomial data

mi <- read.table(header = T, text = "
id n r city  hospital
  1 16     8      0       0
  2 51    26      0       0
  3 45    23      0       0
  4 39    10      0       0
  5 36     9      0       0
  6 81    23      1       0
  7 30    10      1       0
  8 39    17      1       0
  9 28     8      1       0
 10 62    23      1       0
 11 51    32      0       1
 12 72    55      0       1
 13 41    22      0       1
 14 12     3      0       1
 15 13    10      0       1
 16 79    46      1       1
 17 30    15      1       1
 18 51    32      1       1
 19 74    53      1       1
 20 56    12      1       1
")

## Summary
library(plyr)
ddply(mi,
      c("city","hospital"),
      summarize,
      prop = sum(r)/sum(n))
  city hospital   prop
1    0        0 0.4064
2    0        1 0.6455
3    1        0 0.3375
4    1        1 0.5448

## Logistic regression cbind(success, failure) format
iv1 <- glm(formula = cbind(r, n-r) ~ city + hospital + city:hospital,
           family  = binomial(link = "logit"),
           data    = mi)
summary(iv1)

Call:
glm(formula = cbind(r, n - r) ~ city + hospital + city:hospital, 
    family = binomial(link = "logit"), data = mi)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-5.064  -1.133   0.252   1.214   3.025  

Coefficients:
              Estimate Std. Error z value  Pr(>|z|)    
(Intercept)     -0.379      0.149   -2.54     0.011 *  
city            -0.296      0.202   -1.46     0.143    
hospital         0.978      0.213    4.60 0.0000043 ***
city:hospital   -0.124      0.279   -0.44     0.657    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 118.195  on 19  degrees of freedom
Residual deviance:  68.346  on 16  degrees of freedom
AIC: 156.5

Number of Fisher Scoring iterations: 4
deviance(iv1)/df.residual(iv1)
[1] 4.272

## Estimate scale parameter (The squared scale parameter is shown.)
iv2 <- glm(formula = cbind(r, n-r) ~ city + hospital + city:hospital,
           family  = quasibinomial(link = "logit"),
           data    = mi)
summary(iv2)

Call:
glm(formula = cbind(r, n - r) ~ city + hospital + city:hospital, 
    family = quasibinomial(link = "logit"), data = mi)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-5.064  -1.133   0.252   1.214   3.025  

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)     -0.379      0.304   -1.25    0.231  
city            -0.296      0.413   -0.72    0.484  
hospital         0.978      0.435    2.25    0.039 *
city:hospital   -0.124      0.570   -0.22    0.831  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for quasibinomial family taken to be 4.173)

    Null deviance: 118.195  on 19  degrees of freedom
Residual deviance:  68.346  on 16  degrees of freedom
AIC: NA

Number of Fisher Scoring iterations: 4

V. Poisson survival

It did not converge for the year variables.

## Extract Chicago only
chicago <- subset(cities3compl, city == "Chicago")

## Add ID
chicago$id <- as.numeric(rownames(chicago))

## Create multiple rows for each subject
chicago_long <- chicago[rep(seq_along(chicago$id), chicago$fuptime + 1),]

## Extract yearadm1 and split by id
years <- split(chicago_long$yearadm1, chicago_long$id)

## Add 0:nrow
years <- lapply(years,
                function(VEC) {
                    VEC + seq_along(VEC) - 1
                })
## Put back
chicago_long$cal_yr <- unlist(years)

## event 1 if cal_yr == yeard
chicago_long$event <- 0
## indexes for event 1
event.index <- chicago_long$yeard == chicago_long$cal_yr
## Remove NA and NaN
event.index[is.na(event.index)|is.nan(event.index)] <- FALSE
## Enter 1
chicago_long[event.index, "event"] <- 1

## categorical year 2005 as reference
chicago_long$cal_yr <- factor(chicago_long$cal_yr)
chicago_long$cal_yr <- relevel(chicago_long$cal_yr, ref = "1985")

## Check
head(chicago_long)
         yearadm1 dateadmi age sex race mip resp mitype datedead days_ic days_cc diabpc copdpc yeard yearadm2
128864       2002    15649  87   1    1   0    0      8    16131       6       0      0      0  2004     2002
128864.1     2002    15649  87   1    1   0    0      8    16131       6       0      0      0  2004     2002
128864.2     2002    15649  87   1    1   0    0      8    16131       6       0      0      0  2004     2002
128865       1988    10522  66   2    1   0    0      7      NaN       0       0      1      0   NaN     1988
128865.1     1988    10522  66   2    1   0    0      7      NaN       0       0      1      0   NaN     1988
128865.2     1988    10522  66   2    1   0    0      7      NaN       0       0      1      0   NaN     1988
         yearlast fuptime readmcount newreadm    city age65 male newrace     id cal_yr event
128864       2004       2          0        0 Chicago    22    1       1 128864   2002     0
128864.1     2004       2          0        0 Chicago    22    1       1 128864   2003     0
128864.2     2004       2          0        0 Chicago    22    1       1 128864   2004     1
128865       2003      15          0        0 Chicago     1    0       1 128865   1988     0
128865.1     2003      15          0        0 Chicago     1    0       1 128865   1989     0
128865.2     2003      15          0        0 Chicago     1    0       1 128865   1990     0

## Poisson (It did not really converge)
res.v <- glm(formula = event ~ age65 + newrace + male + cal_yr,
             family  = poisson(link = "log"),
             data    = chicago_long)
summary(res.v)

Call:
glm(formula = event ~ age65 + newrace + male + cal_yr, family = poisson(link = "log"), 
    data = chicago_long)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-2.246  -0.307  -0.249  -0.206   2.747  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -17.155828  45.084742   -0.38     0.70    
age65         0.064695   0.000959   67.49  < 2e-16 ***
newrace2      0.305916   0.020100   15.22  < 2e-16 ***
newrace3      0.303154   0.050709    5.98  2.3e-09 ***
male          0.109223   0.015084    7.24  4.5e-13 ***
cal_yr1986   13.558609  45.084771    0.30     0.76    
cal_yr1987   13.831278  45.084755    0.31     0.76    
cal_yr1988   13.641359  45.084754    0.30     0.76    
cal_yr1989   13.490020  45.084754    0.30     0.76    
cal_yr1990   13.356974  45.084753    0.30     0.77    
cal_yr1991   13.281846  45.084752    0.29     0.77    
cal_yr1992   13.237651  45.084751    0.29     0.77    
cal_yr1993   12.878211  45.084754    0.29     0.78    
cal_yr1994   13.053587  45.084751    0.29     0.77    
cal_yr1995   13.185591  45.084748    0.29     0.77    
cal_yr1996   13.046484  45.084749    0.29     0.77    
cal_yr1997   12.972161  45.084749    0.29     0.77    
cal_yr1998   12.971685  45.084748    0.29     0.77    
cal_yr1999   12.939012  45.084748    0.29     0.77    
cal_yr2000   12.391757  45.084754    0.27     0.78    
cal_yr2001   12.754786  45.084749    0.28     0.78    
cal_yr2002   12.805058  45.084748    0.28     0.78    
cal_yr2003   12.899740  45.084747    0.29     0.77    
cal_yr2004   15.705196  45.084749    0.35     0.73    
cal_yr2005   15.858065  45.084798    0.35     0.73    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 117068  on 433395  degrees of freedom
Residual deviance: 105131  on 433371  degrees of freedom
AIC: 142355

Number of Fisher Scoring iterations: 14

## Check for overdispersion
deviance(res.v)/df.residual(res.v)
[1] 0.2426

By creating dummy variables for the calendar years

## Extract dummy variables for year
yr_dummy_vars <- data.frame(model.matrix(res.v)[,grep("yr", colnames(model.matrix(res.v)))])
names(yr_dummy_vars)
 [1] "cal_yr1986" "cal_yr1987" "cal_yr1988" "cal_yr1989" "cal_yr1990" "cal_yr1991" "cal_yr1992" "cal_yr1993"
 [9] "cal_yr1994" "cal_yr1995" "cal_yr1996" "cal_yr1997" "cal_yr1998" "cal_yr1999" "cal_yr2000" "cal_yr2001"
[17] "cal_yr2002" "cal_yr2003" "cal_yr2004" "cal_yr2005"

## Add to dataset
chicago_long <- cbind(chicago_long,
                      yr_dummy_vars)

## Poisson (It did not help)
res.v2 <- glm(formula = event ~ age65 + newrace + male + cal_yr1986 + cal_yr1987 + cal_yr1988 + cal_yr1989 + cal_yr1990 + cal_yr1991 + cal_yr1992 + cal_yr1993 + cal_yr1994 + cal_yr1995 + cal_yr1996 + cal_yr1997 + cal_yr1998 + cal_yr1999 + cal_yr2000 + cal_yr2001 + cal_yr2002 + cal_yr2003 + cal_yr2004 + cal_yr2005,
             family  = poisson(link = "log"),
             data    = chicago_long)
summary(res.v2)

Call:
glm(formula = event ~ age65 + newrace + male + cal_yr1986 + cal_yr1987 + 
    cal_yr1988 + cal_yr1989 + cal_yr1990 + cal_yr1991 + cal_yr1992 + 
    cal_yr1993 + cal_yr1994 + cal_yr1995 + cal_yr1996 + cal_yr1997 + 
    cal_yr1998 + cal_yr1999 + cal_yr2000 + cal_yr2001 + cal_yr2002 + 
    cal_yr2003 + cal_yr2004 + cal_yr2005, family = poisson(link = "log"), 
    data = chicago_long)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-2.246  -0.307  -0.249  -0.206   2.747  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -17.155828  45.084742   -0.38     0.70    
age65         0.064695   0.000959   67.49  < 2e-16 ***
newrace2      0.305916   0.020100   15.22  < 2e-16 ***
newrace3      0.303154   0.050709    5.98  2.3e-09 ***
male          0.109223   0.015084    7.24  4.5e-13 ***
cal_yr1986   13.558609  45.084771    0.30     0.76    
cal_yr1987   13.831278  45.084755    0.31     0.76    
cal_yr1988   13.641359  45.084754    0.30     0.76    
cal_yr1989   13.490020  45.084754    0.30     0.76    
cal_yr1990   13.356974  45.084753    0.30     0.77    
cal_yr1991   13.281846  45.084752    0.29     0.77    
cal_yr1992   13.237651  45.084751    0.29     0.77    
cal_yr1993   12.878211  45.084754    0.29     0.78    
cal_yr1994   13.053587  45.084751    0.29     0.77    
cal_yr1995   13.185591  45.084748    0.29     0.77    
cal_yr1996   13.046484  45.084749    0.29     0.77    
cal_yr1997   12.972161  45.084749    0.29     0.77    
cal_yr1998   12.971685  45.084748    0.29     0.77    
cal_yr1999   12.939012  45.084748    0.29     0.77    
cal_yr2000   12.391757  45.084754    0.27     0.78    
cal_yr2001   12.754786  45.084749    0.28     0.78    
cal_yr2002   12.805058  45.084748    0.28     0.78    
cal_yr2003   12.899740  45.084747    0.29     0.77    
cal_yr2004   15.705196  45.084749    0.35     0.73    
cal_yr2005   15.858065  45.084798    0.35     0.73    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 117068  on 433395  degrees of freedom
Residual deviance: 105131  on 433371  degrees of freedom
AIC: 142355

Number of Fisher Scoring iterations: 14