## Load sas7bdat package
library(sas7bdat)
## Load three datasets as a list of data frames
cities <- lapply(c("lafinal","nycfinal","chicagofinal"),
function(X){
read.sas7bdat(paste0(X,".sas7bdat"))
})
## add city names
cities2 <- lapply(1:3, function(i) {
cities[[i]]$city <- c("LA","NYC","Chicago")[i]
cities[[i]]
})
## Merge as a data frame
cities2 <- do.call(rbind, cities2)
cities3 <- within(cities2, {
newrace <- race
newrace[is.nan(race)] <- NA
newrace[race == 0] <- NA
newrace[race >= 4] <- 3
newrace <- factor(newrace)
mitype[mitype == 0] <- NA
male <- as.numeric(sex == 1)
days_cc[is.na(days_cc)] <- 0
days_ic[is.na(days_ic)] <- 0
age65 <- age - 65
})
cities3compl <- cities3[complete.cases(cities3[,c("newrace","mitype")]),]
## Continuous
library(psych)
describe(cities3compl[,c("age","days_cc","days_ic","fuptime")])
var n mean sd median trimmed mad min max range skew kurtosis se
age 1 182149 76.94 7.70 76 76.54 8.90 65 117 52 0.41 -0.53 0.02
days_cc 2 182149 1.59 3.56 0 0.80 0.00 0 126 126 6.66 108.71 0.01
days_ic 3 182149 1.88 4.45 0 0.97 0.00 0 338 338 12.00 456.76 0.01
fuptime 4 182149 7.27 5.58 6 6.83 7.41 1 19 18 0.44 -1.20 0.01
## Tabling
lapply(cities3compl[,c("readmcount","mitype","race","sex","yearadm1")], table)
$readmcount
0 1 2 3 4 5 6 7 8 9 10 11 12 14 16
139991 32471 6876 1892 562 208 80 37 15 6 6 2 1 1 1
$mitype
1 2 3 4 5 6 7 8 9 10
8221 32445 5048 3789 32366 3909 1461 78227 4610 12073
$race
1 2 3 4 5 6
149843 21584 5869 1682 3126 45
$sex
1 2
90454 91695
$yearadm1
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
8396 11267 11484 10920 10147 10470 10136 10490 10562 10633 10376 10409 9971 9483 8973 8812 8896 9066 1658
## Histogram of the outcome
hist(cities3compl$readmcount)
## 4.a Poisson
res4 <- glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)),
family = poisson(link = "log"),
data = cities3compl)
summary(res4)
Call:
glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)),
family = poisson(link = "log"), data = cities3compl)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.776 -0.871 -0.519 -0.237 9.550
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.938049 0.010717 -367.5 <2e-16 ***
age65 0.053816 0.000552 97.5 <2e-16 ***
newrace2 0.184770 0.013303 13.9 <2e-16 ***
newrace3 0.517993 0.016905 30.6 <2e-16 ***
male 0.146657 0.008653 16.9 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 228183 on 182148 degrees of freedom
Residual deviance: 218482 on 182144 degrees of freedom
AIC: 310189
Number of Fisher Scoring iterations: 6
## 4.b Quasi-Poisson
res4q <- glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)),
family = quasipoisson(link = "log"),
data = cities3compl)
summary(res4q)
Call:
glm(formula = readmcount ~ age65 + newrace + male + offset(log(fuptime)),
family = quasipoisson(link = "log"), data = cities3compl)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.776 -0.871 -0.519 -0.237 9.550
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.93805 0.02015 -195.44 < 2e-16 ***
age65 0.05382 0.00104 51.83 < 2e-16 ***
newrace2 0.18477 0.02501 7.39 1.5e-13 ***
newrace3 0.51799 0.03179 16.30 < 2e-16 ***
male 0.14666 0.01627 9.01 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 3.535)
Null deviance: 228183 on 182148 degrees of freedom
Residual deviance: 218482 on 182144 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 6
## 4.c More predictors
res4q2 <- glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) + newrace + male + factor(yearadm1) + city + offset(log(fuptime)),
family = quasipoisson(link = "log"),
data = cities3compl)
summary(res4q2)
Call:
glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) +
newrace + male + factor(yearadm1) + city + offset(log(fuptime)),
family = quasipoisson(link = "log"), data = cities3compl)
Deviance Residuals:
Min 1Q Median 3Q Max
-8.089 -0.850 -0.645 -0.233 9.554
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.427685 0.053284 -83.10 < 2e-16 ***
age65 0.042095 0.000966 43.59 < 2e-16 ***
days_cc 0.028011 0.001395 20.08 < 2e-16 ***
days_ic 0.017917 0.000706 25.39 < 2e-16 ***
factor(mitype)2 0.022467 0.043032 0.52 0.60160
factor(mitype)3 -0.093984 0.064118 -1.47 0.14270
factor(mitype)4 -0.212153 0.073241 -2.90 0.00377 **
factor(mitype)5 -0.074237 0.043485 -1.71 0.08779 .
factor(mitype)6 0.073618 0.065957 1.12 0.26436
factor(mitype)7 -0.232435 0.107205 -2.17 0.03015 *
factor(mitype)8 0.476444 0.040054 11.90 < 2e-16 ***
factor(mitype)9 0.540657 0.055738 9.70 < 2e-16 ***
factor(mitype)10 0.566374 0.046179 12.26 < 2e-16 ***
newrace2 0.088319 0.022933 3.85 0.00012 ***
newrace3 0.295298 0.029545 9.99 < 2e-16 ***
male 0.148237 0.014841 9.99 < 2e-16 ***
factor(yearadm1)1986 -0.097981 0.042713 -2.29 0.02179 *
factor(yearadm1)1987 0.016009 0.042336 0.38 0.70532
factor(yearadm1)1988 0.032769 0.043208 0.76 0.44821
factor(yearadm1)1989 0.122264 0.043944 2.78 0.00540 **
factor(yearadm1)1990 0.188354 0.043264 4.35 1.3e-05 ***
factor(yearadm1)1991 0.140379 0.044044 3.19 0.00144 **
factor(yearadm1)1992 0.270947 0.043570 6.22 5.0e-10 ***
factor(yearadm1)1993 0.315588 0.043949 7.18 7.0e-13 ***
factor(yearadm1)1994 0.421165 0.043593 9.66 < 2e-16 ***
factor(yearadm1)1995 0.479647 0.044093 10.88 < 2e-16 ***
factor(yearadm1)1996 0.592984 0.044112 13.44 < 2e-16 ***
factor(yearadm1)1997 0.582094 0.045274 12.86 < 2e-16 ***
factor(yearadm1)1998 0.652852 0.046106 14.16 < 2e-16 ***
factor(yearadm1)1999 0.851148 0.047334 17.98 < 2e-16 ***
factor(yearadm1)2000 1.072305 0.047448 22.60 < 2e-16 ***
factor(yearadm1)2001 1.218718 0.048755 25.00 < 2e-16 ***
factor(yearadm1)2002 1.497612 0.051486 29.09 < 2e-16 ***
factor(yearadm1)2003 0.975960 0.113537 8.60 < 2e-16 ***
cityLA -0.051384 0.019316 -2.66 0.00781 **
cityNYC 0.093646 0.017701 5.29 1.2e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 2.94)
Null deviance: 228183 on 182148 degrees of freedom
Residual deviance: 202828 on 182113 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 7
res5 <- glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) + newrace + male + factor(yearadm1) + city + age65:male + offset(log(fuptime)),
family = quasipoisson(link = "log"),
data = cities3compl)
summary(res5)
Call:
glm(formula = readmcount ~ age65 + days_cc + days_ic + factor(mitype) +
newrace + male + factor(yearadm1) + city + age65:male + offset(log(fuptime)),
family = quasipoisson(link = "log"), data = cities3compl)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.981 -0.850 -0.644 -0.234 9.582
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.331737 0.054801 -79.05 < 2e-16 ***
age65 0.035843 0.001312 27.32 < 2e-16 ***
days_cc 0.027749 0.001395 19.89 < 2e-16 ***
days_ic 0.017946 0.000711 25.24 < 2e-16 ***
factor(mitype)2 0.022181 0.042974 0.52 0.60575
factor(mitype)3 -0.094205 0.064032 -1.47 0.14123
factor(mitype)4 -0.210787 0.073142 -2.88 0.00395 **
factor(mitype)5 -0.075026 0.043426 -1.73 0.08405 .
factor(mitype)6 0.072724 0.065869 1.10 0.26956
factor(mitype)7 -0.231830 0.107060 -2.17 0.03036 *
factor(mitype)8 0.473830 0.040001 11.85 < 2e-16 ***
factor(mitype)9 0.539750 0.055663 9.70 < 2e-16 ***
factor(mitype)10 0.563866 0.046117 12.23 < 2e-16 ***
newrace2 0.086580 0.022909 3.78 0.00016 ***
newrace3 0.293634 0.029505 9.95 < 2e-16 ***
male -0.031435 0.029453 -1.07 0.28583
factor(yearadm1)1986 -0.099036 0.042656 -2.32 0.02025 *
factor(yearadm1)1987 0.015590 0.042279 0.37 0.71232
factor(yearadm1)1988 0.033287 0.043150 0.77 0.44045
factor(yearadm1)1989 0.121485 0.043885 2.77 0.00564 **
factor(yearadm1)1990 0.189438 0.043205 4.38 1.2e-05 ***
factor(yearadm1)1991 0.141269 0.043986 3.21 0.00132 **
factor(yearadm1)1992 0.272902 0.043512 6.27 3.6e-10 ***
factor(yearadm1)1993 0.316737 0.043887 7.22 5.3e-13 ***
factor(yearadm1)1994 0.422529 0.043535 9.71 < 2e-16 ***
factor(yearadm1)1995 0.481522 0.044033 10.94 < 2e-16 ***
factor(yearadm1)1996 0.594964 0.044053 13.51 < 2e-16 ***
factor(yearadm1)1997 0.583271 0.045213 12.90 < 2e-16 ***
factor(yearadm1)1998 0.654958 0.046044 14.22 < 2e-16 ***
factor(yearadm1)1999 0.852374 0.047270 18.03 < 2e-16 ***
factor(yearadm1)2000 1.074197 0.047388 22.67 < 2e-16 ***
factor(yearadm1)2001 1.220374 0.048689 25.06 < 2e-16 ***
factor(yearadm1)2002 1.499895 0.051417 29.17 < 2e-16 ***
factor(yearadm1)2003 0.977267 0.113385 8.62 < 2e-16 ***
cityLA -0.050705 0.019290 -2.63 0.00858 **
cityNYC 0.093631 0.017677 5.30 1.2e-07 ***
age65:male 0.013207 0.001874 7.05 1.8e-12 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 2.932)
Null deviance: 228183 on 182148 degrees of freedom
Residual deviance: 202682 on 182112 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 7
## Quadrantic age
res5q <- glm(formula = readmcount ~ age65 + I(age65^2) + days_cc + days_ic + factor(mitype) + newrace + male + factor(yearadm1) + city + (age65 + I(age65^2)):male + offset(log(fuptime)),
family = quasipoisson(link = "log"),
data = cities3compl)
summary(res5q)
Call:
glm(formula = readmcount ~ age65 + I(age65^2) + days_cc + days_ic +
factor(mitype) + newrace + male + factor(yearadm1) + city +
(age65 + I(age65^2)):male + offset(log(fuptime)), family = quasipoisson(link = "log"),
data = cities3compl)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.921 -0.851 -0.637 -0.232 9.534
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.388486 0.060667 -72.34 < 2e-16 ***
age65 0.045161 0.004601 9.82 < 2e-16 ***
I(age65^2) -0.000303 0.000144 -2.11 0.0347 *
days_cc 0.027671 0.001400 19.77 < 2e-16 ***
days_ic 0.017868 0.000715 25.01 < 2e-16 ***
factor(mitype)2 0.023576 0.042979 0.55 0.5833
factor(mitype)3 -0.093248 0.064038 -1.46 0.1454
factor(mitype)4 -0.208130 0.073151 -2.85 0.0044 **
factor(mitype)5 -0.073027 0.043432 -1.68 0.0927 .
factor(mitype)6 0.073257 0.065875 1.11 0.2661
factor(mitype)7 -0.230832 0.107071 -2.16 0.0311 *
factor(mitype)8 0.474685 0.040003 11.87 < 2e-16 ***
factor(mitype)9 0.541286 0.055668 9.72 < 2e-16 ***
factor(mitype)10 0.567450 0.046123 12.30 < 2e-16 ***
newrace2 0.090299 0.022928 3.94 8.2e-05 ***
newrace3 0.294915 0.029512 9.99 < 2e-16 ***
male -0.072026 0.044200 -1.63 0.1032
factor(yearadm1)1986 -0.098264 0.042661 -2.30 0.0213 *
factor(yearadm1)1987 0.016998 0.042284 0.40 0.6877
factor(yearadm1)1988 0.034672 0.043154 0.80 0.4217
factor(yearadm1)1989 0.121916 0.043889 2.78 0.0055 **
factor(yearadm1)1990 0.190213 0.043210 4.40 1.1e-05 ***
factor(yearadm1)1991 0.142650 0.043991 3.24 0.0012 **
factor(yearadm1)1992 0.273836 0.043518 6.29 3.1e-10 ***
factor(yearadm1)1993 0.318616 0.043893 7.26 3.9e-13 ***
factor(yearadm1)1994 0.423975 0.043543 9.74 < 2e-16 ***
factor(yearadm1)1995 0.482227 0.044039 10.95 < 2e-16 ***
factor(yearadm1)1996 0.595581 0.044058 13.52 < 2e-16 ***
factor(yearadm1)1997 0.584621 0.045220 12.93 < 2e-16 ***
factor(yearadm1)1998 0.656603 0.046051 14.26 < 2e-16 ***
factor(yearadm1)1999 0.854361 0.047278 18.07 < 2e-16 ***
factor(yearadm1)2000 1.078205 0.047393 22.75 < 2e-16 ***
factor(yearadm1)2001 1.224644 0.048702 25.15 < 2e-16 ***
factor(yearadm1)2002 1.506453 0.051438 29.29 < 2e-16 ***
factor(yearadm1)2003 0.991942 0.113437 8.74 < 2e-16 ***
cityLA -0.049075 0.019296 -2.54 0.0110 *
cityNYC 0.094289 0.017680 5.33 9.7e-08 ***
age65:male 0.023407 0.006473 3.62 0.0003 ***
I(age65^2):male -0.000408 0.000214 -1.90 0.0570 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 2.932)
Null deviance: 228183 on 182148 degrees of freedom
Residual deviance: 202609 on 182110 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 7
## LRT
drop1(res5q, scope = ~ I(age65^2), test = "Chisq")
Single term deletions
Model:
readmcount ~ age65 + I(age65^2) + days_cc + days_ic + factor(mitype) +
newrace + male + factor(yearadm1) + city + (age65 + I(age65^2)):male +
offset(log(fuptime))
Df Deviance scaled dev. Pr(>Chi)
<none> 202609
I(age65^2) 1 202622 4.52 0.033 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mi <- read.table(header = T, text = "
id n r city hospital
1 16 8 0 0
2 51 26 0 0
3 45 23 0 0
4 39 10 0 0
5 36 9 0 0
6 81 23 1 0
7 30 10 1 0
8 39 17 1 0
9 28 8 1 0
10 62 23 1 0
11 51 32 0 1
12 72 55 0 1
13 41 22 0 1
14 12 3 0 1
15 13 10 0 1
16 79 46 1 1
17 30 15 1 1
18 51 32 1 1
19 74 53 1 1
20 56 12 1 1
")
## Summary
library(plyr)
ddply(mi,
c("city","hospital"),
summarize,
prop = sum(r)/sum(n))
city hospital prop
1 0 0 0.4064
2 0 1 0.6455
3 1 0 0.3375
4 1 1 0.5448
## Logistic regression cbind(success, failure) format
iv1 <- glm(formula = cbind(r, n-r) ~ city + hospital + city:hospital,
family = binomial(link = "logit"),
data = mi)
summary(iv1)
Call:
glm(formula = cbind(r, n - r) ~ city + hospital + city:hospital,
family = binomial(link = "logit"), data = mi)
Deviance Residuals:
Min 1Q Median 3Q Max
-5.064 -1.133 0.252 1.214 3.025
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.379 0.149 -2.54 0.011 *
city -0.296 0.202 -1.46 0.143
hospital 0.978 0.213 4.60 0.0000043 ***
city:hospital -0.124 0.279 -0.44 0.657
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 118.195 on 19 degrees of freedom
Residual deviance: 68.346 on 16 degrees of freedom
AIC: 156.5
Number of Fisher Scoring iterations: 4
deviance(iv1)/df.residual(iv1)
[1] 4.272
## Estimate scale parameter (The squared scale parameter is shown.)
iv2 <- glm(formula = cbind(r, n-r) ~ city + hospital + city:hospital,
family = quasibinomial(link = "logit"),
data = mi)
summary(iv2)
Call:
glm(formula = cbind(r, n - r) ~ city + hospital + city:hospital,
family = quasibinomial(link = "logit"), data = mi)
Deviance Residuals:
Min 1Q Median 3Q Max
-5.064 -1.133 0.252 1.214 3.025
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.379 0.304 -1.25 0.231
city -0.296 0.413 -0.72 0.484
hospital 0.978 0.435 2.25 0.039 *
city:hospital -0.124 0.570 -0.22 0.831
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasibinomial family taken to be 4.173)
Null deviance: 118.195 on 19 degrees of freedom
Residual deviance: 68.346 on 16 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 4
It did not converge for the year variables.
## Extract Chicago only
chicago <- subset(cities3compl, city == "Chicago")
## Add ID
chicago$id <- as.numeric(rownames(chicago))
## Create multiple rows for each subject
chicago_long <- chicago[rep(seq_along(chicago$id), chicago$fuptime + 1),]
## Extract yearadm1 and split by id
years <- split(chicago_long$yearadm1, chicago_long$id)
## Add 0:nrow
years <- lapply(years,
function(VEC) {
VEC + seq_along(VEC) - 1
})
## Put back
chicago_long$cal_yr <- unlist(years)
## event 1 if cal_yr == yeard
chicago_long$event <- 0
## indexes for event 1
event.index <- chicago_long$yeard == chicago_long$cal_yr
## Remove NA and NaN
event.index[is.na(event.index)|is.nan(event.index)] <- FALSE
## Enter 1
chicago_long[event.index, "event"] <- 1
## categorical year 2005 as reference
chicago_long$cal_yr <- factor(chicago_long$cal_yr)
chicago_long$cal_yr <- relevel(chicago_long$cal_yr, ref = "1985")
## Check
head(chicago_long)
yearadm1 dateadmi age sex race mip resp mitype datedead days_ic days_cc diabpc copdpc yeard yearadm2
128864 2002 15649 87 1 1 0 0 8 16131 6 0 0 0 2004 2002
128864.1 2002 15649 87 1 1 0 0 8 16131 6 0 0 0 2004 2002
128864.2 2002 15649 87 1 1 0 0 8 16131 6 0 0 0 2004 2002
128865 1988 10522 66 2 1 0 0 7 NaN 0 0 1 0 NaN 1988
128865.1 1988 10522 66 2 1 0 0 7 NaN 0 0 1 0 NaN 1988
128865.2 1988 10522 66 2 1 0 0 7 NaN 0 0 1 0 NaN 1988
yearlast fuptime readmcount newreadm city age65 male newrace id cal_yr event
128864 2004 2 0 0 Chicago 22 1 1 128864 2002 0
128864.1 2004 2 0 0 Chicago 22 1 1 128864 2003 0
128864.2 2004 2 0 0 Chicago 22 1 1 128864 2004 1
128865 2003 15 0 0 Chicago 1 0 1 128865 1988 0
128865.1 2003 15 0 0 Chicago 1 0 1 128865 1989 0
128865.2 2003 15 0 0 Chicago 1 0 1 128865 1990 0
## Poisson (It did not really converge)
res.v <- glm(formula = event ~ age65 + newrace + male + cal_yr,
family = poisson(link = "log"),
data = chicago_long)
summary(res.v)
Call:
glm(formula = event ~ age65 + newrace + male + cal_yr, family = poisson(link = "log"),
data = chicago_long)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.246 -0.307 -0.249 -0.206 2.747
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -17.155828 45.084742 -0.38 0.70
age65 0.064695 0.000959 67.49 < 2e-16 ***
newrace2 0.305916 0.020100 15.22 < 2e-16 ***
newrace3 0.303154 0.050709 5.98 2.3e-09 ***
male 0.109223 0.015084 7.24 4.5e-13 ***
cal_yr1986 13.558609 45.084771 0.30 0.76
cal_yr1987 13.831278 45.084755 0.31 0.76
cal_yr1988 13.641359 45.084754 0.30 0.76
cal_yr1989 13.490020 45.084754 0.30 0.76
cal_yr1990 13.356974 45.084753 0.30 0.77
cal_yr1991 13.281846 45.084752 0.29 0.77
cal_yr1992 13.237651 45.084751 0.29 0.77
cal_yr1993 12.878211 45.084754 0.29 0.78
cal_yr1994 13.053587 45.084751 0.29 0.77
cal_yr1995 13.185591 45.084748 0.29 0.77
cal_yr1996 13.046484 45.084749 0.29 0.77
cal_yr1997 12.972161 45.084749 0.29 0.77
cal_yr1998 12.971685 45.084748 0.29 0.77
cal_yr1999 12.939012 45.084748 0.29 0.77
cal_yr2000 12.391757 45.084754 0.27 0.78
cal_yr2001 12.754786 45.084749 0.28 0.78
cal_yr2002 12.805058 45.084748 0.28 0.78
cal_yr2003 12.899740 45.084747 0.29 0.77
cal_yr2004 15.705196 45.084749 0.35 0.73
cal_yr2005 15.858065 45.084798 0.35 0.73
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 117068 on 433395 degrees of freedom
Residual deviance: 105131 on 433371 degrees of freedom
AIC: 142355
Number of Fisher Scoring iterations: 14
## Check for overdispersion
deviance(res.v)/df.residual(res.v)
[1] 0.2426
By creating dummy variables for the calendar years
## Extract dummy variables for year
yr_dummy_vars <- data.frame(model.matrix(res.v)[,grep("yr", colnames(model.matrix(res.v)))])
names(yr_dummy_vars)
[1] "cal_yr1986" "cal_yr1987" "cal_yr1988" "cal_yr1989" "cal_yr1990" "cal_yr1991" "cal_yr1992" "cal_yr1993"
[9] "cal_yr1994" "cal_yr1995" "cal_yr1996" "cal_yr1997" "cal_yr1998" "cal_yr1999" "cal_yr2000" "cal_yr2001"
[17] "cal_yr2002" "cal_yr2003" "cal_yr2004" "cal_yr2005"
## Add to dataset
chicago_long <- cbind(chicago_long,
yr_dummy_vars)
## Poisson (It did not help)
res.v2 <- glm(formula = event ~ age65 + newrace + male + cal_yr1986 + cal_yr1987 + cal_yr1988 + cal_yr1989 + cal_yr1990 + cal_yr1991 + cal_yr1992 + cal_yr1993 + cal_yr1994 + cal_yr1995 + cal_yr1996 + cal_yr1997 + cal_yr1998 + cal_yr1999 + cal_yr2000 + cal_yr2001 + cal_yr2002 + cal_yr2003 + cal_yr2004 + cal_yr2005,
family = poisson(link = "log"),
data = chicago_long)
summary(res.v2)
Call:
glm(formula = event ~ age65 + newrace + male + cal_yr1986 + cal_yr1987 +
cal_yr1988 + cal_yr1989 + cal_yr1990 + cal_yr1991 + cal_yr1992 +
cal_yr1993 + cal_yr1994 + cal_yr1995 + cal_yr1996 + cal_yr1997 +
cal_yr1998 + cal_yr1999 + cal_yr2000 + cal_yr2001 + cal_yr2002 +
cal_yr2003 + cal_yr2004 + cal_yr2005, family = poisson(link = "log"),
data = chicago_long)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.246 -0.307 -0.249 -0.206 2.747
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -17.155828 45.084742 -0.38 0.70
age65 0.064695 0.000959 67.49 < 2e-16 ***
newrace2 0.305916 0.020100 15.22 < 2e-16 ***
newrace3 0.303154 0.050709 5.98 2.3e-09 ***
male 0.109223 0.015084 7.24 4.5e-13 ***
cal_yr1986 13.558609 45.084771 0.30 0.76
cal_yr1987 13.831278 45.084755 0.31 0.76
cal_yr1988 13.641359 45.084754 0.30 0.76
cal_yr1989 13.490020 45.084754 0.30 0.76
cal_yr1990 13.356974 45.084753 0.30 0.77
cal_yr1991 13.281846 45.084752 0.29 0.77
cal_yr1992 13.237651 45.084751 0.29 0.77
cal_yr1993 12.878211 45.084754 0.29 0.78
cal_yr1994 13.053587 45.084751 0.29 0.77
cal_yr1995 13.185591 45.084748 0.29 0.77
cal_yr1996 13.046484 45.084749 0.29 0.77
cal_yr1997 12.972161 45.084749 0.29 0.77
cal_yr1998 12.971685 45.084748 0.29 0.77
cal_yr1999 12.939012 45.084748 0.29 0.77
cal_yr2000 12.391757 45.084754 0.27 0.78
cal_yr2001 12.754786 45.084749 0.28 0.78
cal_yr2002 12.805058 45.084748 0.28 0.78
cal_yr2003 12.899740 45.084747 0.29 0.77
cal_yr2004 15.705196 45.084749 0.35 0.73
cal_yr2005 15.858065 45.084798 0.35 0.73
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 117068 on 433395 degrees of freedom
Residual deviance: 105131 on 433371 degrees of freedom
AIC: 142355
Number of Fisher Scoring iterations: 14