# Explore the data using descriptive statistics for the 641 time bins
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
framData = read_csv("FraminghamPS4bin.csv")
## Rows: 641 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (13): gender, cursmoke, diabetes, bpmeds, bmicat, agecat, tbin, D, Y, Ra...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(framData)
## gender cursmoke diabetes bpmeds
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.5757 Mean :0.4867 Mean :0.2902 Mean :0.2777
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## bmicat agecat tbin D
## Min. :1.000 Min. :1.000 Min. : 0 Min. : 0.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1825 1st Qu.: 0.000
## Median :3.000 Median :3.000 Median :3650 Median : 1.000
## Mean :2.789 Mean :2.643 Mean :3425 Mean : 2.348
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:5475 3rd Qu.: 2.000
## Max. :4.000 Max. :4.000 Max. :7300 Max. :24.000
##
## Y Rate Lower Upper
## Min. : 47 Min. :0.0000000 Min. :0e+00 Min. :0.00002
## 1st Qu.: 1825 1st Qu.:0.0000000 1st Qu.:1e-05 1st Qu.:0.00010
## Median : 7300 Median :0.0000176 Median :3e-05 Median :0.00040
## Mean : 51123 Mean :0.0002589 Mean :9e-05 Mean :0.00283
## 3rd Qu.: 56869 3rd Qu.:0.0001333 3rd Qu.:8e-05 3rd Qu.:0.00162
## Max. :528539 Max. :0.0212766 Max. :3e-03 Max. :0.15104
## NA's :284 NA's :284
## L
## Min. :1825
## 1st Qu.:1825
## Median :1825
## Mean :1825
## 3rd Qu.:1825
## Max. :1825
##
view(framData)
## explore the data for potential prediction variable
## tbin
table(framData$tbin, useNA="always")
##
## 0 1825 3650 5475 7300 <NA>
## 144 135 129 122 111 0
prop.table(table(framData$tbin))
##
## 0 1825 3650 5475 7300
## 0.2246490 0.2106084 0.2012480 0.1903276 0.1731669
summary(framData$tbin)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 1825 3650 3425 5475 7300
framData %>% count(tbin)
## # A tibble: 5 × 2
## tbin n
## <dbl> <int>
## 1 0 144
## 2 1825 135
## 3 3650 129
## 4 5475 122
## 5 7300 111
## list the data showing the population size and number of deaths in each of the groups
## stratified by gender
framData %>%
dplyr::select(tbin, gender) %>%
group_by(tbin) %>%
count(framData$gender) %>%
mutate(prop = n / sum(n)) %>%
print (n=Inf)
## # A tibble: 10 × 4
## # Groups: tbin [5]
## tbin `framData$gender` n prop
## <dbl> <dbl> <int> <dbl>
## 1 0 0 63 0.438
## 2 0 1 81 0.562
## 3 1825 0 60 0.444
## 4 1825 1 75 0.556
## 5 3650 0 55 0.426
## 6 3650 1 74 0.574
## 7 5475 0 51 0.418
## 8 5475 1 71 0.582
## 9 7300 0 43 0.387
## 10 7300 1 68 0.613
table(framData$geander, useNA="always")
## Warning: Unknown or uninitialised column: `geander`.
##
## <NA>
## 0
prop.table(table(framData$gender))
##
## 0 1
## 0.424337 0.575663
summary(framData$gender)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 1.0000 0.5757 1.0000 1.0000
## stratified by cursmoke
framData %>%
dplyr::select(tbin, cursmoke) %>%
group_by(tbin) %>%
count(framData$cursmoke) %>%
mutate(prop = n / sum(n)) %>%
print (n=Inf)
## # A tibble: 10 × 4
## # Groups: tbin [5]
## tbin `framData$cursmoke` n prop
## <dbl> <dbl> <int> <dbl>
## 1 0 0 72 0.5
## 2 0 1 72 0.5
## 3 1825 0 70 0.519
## 4 1825 1 65 0.481
## 5 3650 0 66 0.512
## 6 3650 1 63 0.488
## 7 5475 0 63 0.516
## 8 5475 1 59 0.484
## 9 7300 0 58 0.523
## 10 7300 1 53 0.477
table(framData$cursmoke, useNA="always")
##
## 0 1 <NA>
## 329 312 0
prop.table(table(framData$cursmoke))
##
## 0 1
## 0.5132605 0.4867395
summary(framData$cursmoke)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4867 1.0000 1.0000
## stratified by diabetes
framData %>%
dplyr::select(tbin, diabetes) %>%
group_by(tbin) %>%
count(framData$diabetes) %>%
mutate(prop = n / sum(n)) %>%
print (n=Inf)
## # A tibble: 10 × 4
## # Groups: tbin [5]
## tbin `framData$diabetes` n prop
## <dbl> <dbl> <int> <dbl>
## 1 0 0 97 0.674
## 2 0 1 47 0.326
## 3 1825 0 94 0.696
## 4 1825 1 41 0.304
## 5 3650 0 92 0.713
## 6 3650 1 37 0.287
## 7 5475 0 88 0.721
## 8 5475 1 34 0.279
## 9 7300 0 84 0.757
## 10 7300 1 27 0.243
table(framData$diabetes, useNA="always")
##
## 0 1 <NA>
## 455 186 0
prop.table(table(framData$diabetes))
##
## 0 1
## 0.7098284 0.2901716
summary(framData$diabetes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2902 1.0000 1.0000
## stratified by bpmeds
framData %>%
dplyr::select(tbin, bpmeds) %>%
group_by(tbin) %>%
count(framData$bpmeds) %>%
mutate(prop = n / sum(n)) %>%
print (n=Inf)
## # A tibble: 10 × 4
## # Groups: tbin [5]
## tbin `framData$bpmeds` n prop
## <dbl> <dbl> <int> <dbl>
## 1 0 0 99 0.688
## 2 0 1 45 0.312
## 3 1825 0 97 0.719
## 4 1825 1 38 0.281
## 5 3650 0 94 0.729
## 6 3650 1 35 0.271
## 7 5475 0 91 0.746
## 8 5475 1 31 0.254
## 9 7300 0 82 0.739
## 10 7300 1 29 0.261
table(framData$bpmeds, useNA="always")
##
## 0 1 <NA>
## 463 178 0
prop.table(table(framData$bpmeds))
##
## 0 1
## 0.7223089 0.2776911
summary(framData$bpmeds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2777 1.0000 1.0000
## stratified by bmicat
framData %>%
dplyr::select(tbin, bmicat) %>%
group_by(tbin) %>%
count(framData$bmicat) %>%
mutate(prop = n / sum(n)) %>%
print (n=Inf)
## # A tibble: 20 × 4
## # Groups: tbin [5]
## tbin `framData$bmicat` n prop
## <dbl> <dbl> <int> <dbl>
## 1 0 1 15 0.104
## 2 0 2 43 0.299
## 3 0 3 45 0.312
## 4 0 4 41 0.285
## 5 1825 1 15 0.111
## 6 1825 2 40 0.296
## 7 1825 3 41 0.304
## 8 1825 4 39 0.289
## 9 3650 1 14 0.109
## 10 3650 2 37 0.287
## 11 3650 3 39 0.302
## 12 3650 4 39 0.302
## 13 5475 1 13 0.107
## 14 5475 2 33 0.270
## 15 5475 3 37 0.303
## 16 5475 4 39 0.320
## 17 7300 1 12 0.108
## 18 7300 2 33 0.297
## 19 7300 3 35 0.315
## 20 7300 4 31 0.279
table(framData$bmicat, useNA="always")
##
## 1 2 3 4 <NA>
## 69 186 197 189 0
prop.table(table(framData$bmicat))
##
## 1 2 3 4
## 0.1076443 0.2901716 0.3073323 0.2948518
summary(framData$bmicat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.789 4.000 4.000
## stratified by agecat
framData %>%
dplyr::select(tbin, agecat) %>%
group_by(tbin) %>%
count(framData$agecat) %>%
mutate(prop = n / sum(n)) %>%
print (n=Inf)
## # A tibble: 20 × 4
## # Groups: tbin [5]
## tbin `framData$agecat` n prop
## <dbl> <dbl> <int> <dbl>
## 1 0 1 22 0.153
## 2 0 2 38 0.264
## 3 0 3 43 0.299
## 4 0 4 41 0.285
## 5 1825 1 21 0.156
## 6 1825 2 37 0.274
## 7 1825 3 42 0.311
## 8 1825 4 35 0.259
## 9 3650 1 21 0.163
## 10 3650 2 37 0.287
## 11 3650 3 39 0.302
## 12 3650 4 32 0.248
## 13 5475 1 19 0.156
## 14 5475 2 37 0.303
## 15 5475 3 36 0.295
## 16 5475 4 30 0.246
## 17 7300 1 19 0.171
## 18 7300 2 37 0.333
## 19 7300 3 32 0.288
## 20 7300 4 23 0.207
table(framData$agecat, useNA="always")
##
## 1 2 3 4 <NA>
## 102 186 192 161 0
prop.table(table(framData$agecat))
##
## 1 2 3 4
## 0.1591264 0.2901716 0.2995320 0.2511700
summary(framData$agecat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.643 4.000 4.000
between models
### explore several Poisson regression models
# model A - incl. every var
modelA = glm(D ~ gender + cursmoke + diabetes + bpmeds + bmicat + agecat, offset=log(Y), data=framData, family=poisson(link="log"))
summary(modelA)
##
## Call:
## glm(formula = D ~ gender + cursmoke + diabetes + bpmeds + bmicat +
## agecat, family = poisson(link = "log"), data = framData,
## offset = log(Y))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.25845 0.14827 -82.675 < 2e-16 ***
## gender -0.50352 0.05348 -9.415 < 2e-16 ***
## cursmoke 0.35391 0.05514 6.419 1.38e-10 ***
## diabetes 0.79385 0.11012 7.209 5.63e-13 ***
## bpmeds 0.64452 0.10893 5.917 3.28e-09 ***
## bmicat 0.12847 0.03718 3.455 0.00055 ***
## agecat 0.73529 0.03015 24.388 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1985.3 on 640 degrees of freedom
## Residual deviance: 1095.5 on 634 degrees of freedom
## AIC: 2132.5
##
## Number of Fisher Scoring iterations: 6
modelA$coefficients; confint.default(modelA)
## (Intercept) gender cursmoke diabetes bpmeds bmicat
## -12.2584464 -0.5035228 0.3539143 0.7938520 0.6445170 0.1284729
## agecat
## 0.7352874
## 2.5 % 97.5 %
## (Intercept) -12.54905656 -11.9678363
## gender -0.60834449 -0.3987012
## cursmoke 0.24584330 0.4619852
## diabetes 0.57802904 1.0096749
## bpmeds 0.43102419 0.8580098
## bmicat 0.05559909 0.2013467
## agecat 0.67619536 0.7943795
exp(modelA$coefficients); exp(confint.default(modelA))
## (Intercept) gender cursmoke diabetes bpmeds bmicat
## 4.744870e-06 6.043977e-01 1.424633e+00 2.211900e+00 1.905067e+00 1.137091e+00
## agecat
## 2.086082e+00
## 2.5 % 97.5 %
## (Intercept) 3.548248e-06 6.345045e-06
## gender 5.442511e-01 6.711912e-01
## cursmoke 1.278699e+00 1.587222e+00
## diabetes 1.782522e+00 2.744709e+00
## bpmeds 1.538833e+00 2.358462e+00
## bmicat 1.057174e+00 1.223049e+00
## agecat 1.966382e+00 2.213067e+00
# model B - excl. not significant var in univariable poisson model
modelB = glm(D ~ gender + diabetes + bpmeds, offset=log(Y), data=framData, family=poisson(link="log"))
summary(modelB)
##
## Call:
## glm(formula = D ~ gender + diabetes + bpmeds, family = poisson(link = "log"),
## data = framData, offset = log(Y))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.79580 0.03571 -274.29 <2e-16 ***
## gender -0.53429 0.05208 -10.26 <2e-16 ***
## diabetes 1.14243 0.10875 10.51 <2e-16 ***
## bpmeds 0.98672 0.10713 9.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1985.3 on 640 degrees of freedom
## Residual deviance: 1740.8 on 637 degrees of freedom
## AIC: 2771.8
##
## Number of Fisher Scoring iterations: 6
modelB$coefficients; confint.default(modelB)
## (Intercept) gender diabetes bpmeds
## -9.7958002 -0.5342919 1.1424253 0.9867199
## 2.5 % 97.5 %
## (Intercept) -9.8657965 -9.7258039
## gender -0.6363734 -0.4322105
## diabetes 0.9292810 1.3555695
## bpmeds 0.7767488 1.1966911
exp(modelB$coefficients); exp(confint.default(modelB))
## (Intercept) gender diabetes bpmeds
## 5.568497e-05 5.860841e-01 3.134361e+00 2.682422e+00
## 2.5 % 97.5 %
## (Intercept) 5.192052e-05 5.972237e-05
## gender 5.292082e-01 6.490728e-01
## diabetes 2.532688e+00 3.878970e+00
## bpmeds 2.174391e+00 3.309149e+00
# model C - excl. not significant var in univariable poisson model
modelC = glm(D ~ cursmoke + bmicat + agecat, offset=log(Y), data=framData, family=poisson(link="log"))
summary(modelC)
##
## Call:
## glm(formula = D ~ cursmoke + bmicat + agecat, family = poisson(link = "log"),
## data = framData, offset = log(Y))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.71997 0.14151 -89.887 < 2e-16 ***
## cursmoke 0.44601 0.05368 8.309 < 2e-16 ***
## bmicat 0.17716 0.03639 4.868 1.13e-06 ***
## agecat 0.76534 0.03008 25.440 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1985.3 on 640 degrees of freedom
## Residual deviance: 1249.0 on 637 degrees of freedom
## AIC: 2280
##
## Number of Fisher Scoring iterations: 5
modelC$coefficients; confint.default(modelC)
## (Intercept) cursmoke bmicat agecat
## -12.7199665 0.4460132 0.1771617 0.7653370
## 2.5 % 97.5 %
## (Intercept) -12.9973228 -12.4426102
## cursmoke 0.3408061 0.5512203
## bmicat 0.1058362 0.2484871
## agecat 0.7063744 0.8242997
exp(modelC$coefficients); exp(confint.default(modelC))
## (Intercept) cursmoke bmicat agecat
## 2.990809e-06 1.562072e+00 1.193824e+00 2.149719e+00
## 2.5 % 97.5 %
## (Intercept) 2.266389e-06 3.946781e-06
## cursmoke 1.406081e+00 1.735369e+00
## bmicat 1.111640e+00 1.282084e+00
## agecat 2.026630e+00 2.280283e+00
## compare AIC
AIC(modelA)
## [1] 2132.49
AIC(modelB)
## [1] 2771.786
AIC(modelC)
## [1] 2279.996
# Pearson chi-square goodness-of-fit test (like poisgof in Stata)
X2 = sum(residuals(modelA, type = "pearson")^2); X2
## [1] 1821.248
df = modelA$df.residual; df
## [1] 634
pval = 1-pchisq(X2, df); pval
## [1] 0
# method 2: Negative binomial regression
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
modelD = glm.nb(D ~ gender + cursmoke + diabetes + bpmeds + bmicat + agecat + offset(log(Y)), data=framData)
summary(modelD)
##
## Call:
## glm.nb(formula = D ~ gender + cursmoke + diabetes + bpmeds +
## bmicat + agecat + offset(log(Y)), data = framData, init.theta = 3.249813397,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.24777 0.20865 -58.701 < 2e-16 ***
## gender -0.53592 0.08467 -6.330 2.46e-10 ***
## cursmoke 0.41137 0.08579 4.795 1.62e-06 ***
## diabetes 0.77647 0.12628 6.149 7.82e-10 ***
## bpmeds 0.64337 0.12706 5.063 4.12e-07 ***
## bmicat 0.12382 0.05230 2.367 0.0179 *
## agecat 0.76950 0.04464 17.238 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.2498) family taken to be 1)
##
## Null deviance: 1093.92 on 640 degrees of freedom
## Residual deviance: 667.42 on 634 degrees of freedom
## AIC: 1959.9
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.250
## Std. Err.: 0.509
##
## 2 x log-likelihood: -1943.928
modelD$coefficients; confint.default(modelD)
## (Intercept) gender cursmoke diabetes bpmeds bmicat
## -12.2477690 -0.5359185 0.4113733 0.7764684 0.6433744 0.1238180
## agecat
## 0.7694988
## 2.5 % 97.5 %
## (Intercept) -12.65671147 -11.8388264
## gender -0.70186288 -0.3699741
## cursmoke 0.24323765 0.5795090
## diabetes 0.52895544 1.0239813
## bpmeds 0.39433436 0.8924144
## bmicat 0.02130334 0.2263327
## agecat 0.68200484 0.8569927
exp(modelD$coefficients); exp(confint.default(modelD))
## (Intercept) gender cursmoke diabetes bpmeds bmicat
## 4.795805e-06 5.851316e-01 1.508889e+00 2.173782e+00 1.902891e+00 1.131810e+00
## agecat
## 2.158684e+00
## 2.5 % 97.5 %
## (Intercept) 3.186105e-06 7.218767e-06
## gender 4.956611e-01 6.907522e-01
## cursmoke 1.275372e+00 1.785162e+00
## diabetes 1.697159e+00 2.784258e+00
## bpmeds 1.483396e+00 2.441016e+00
## bmicat 1.021532e+00 1.253993e+00
## agecat 1.977839e+00 2.356065e+00
modelE = glm.nb(D ~ cursmoke + bmicat + agecat + offset(log(Y)), data=framData)
summary(modelE)
##
## Call:
## glm.nb(formula = D ~ cursmoke + bmicat + agecat + offset(log(Y)),
## data = framData, init.theta = 2.412077272, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.59378 0.21763 -57.869 < 2e-16 ***
## cursmoke 0.39591 0.09350 4.234 2.29e-05 ***
## bmicat 0.16881 0.05564 3.034 0.00241 **
## agecat 0.82482 0.04691 17.585 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(2.4121) family taken to be 1)
##
## Null deviance: 988.29 on 640 degrees of freedom
## Residual deviance: 689.89 on 637 degrees of freedom
## AIC: 2031
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 2.412
## Std. Err.: 0.324
##
## 2 x log-likelihood: -2021.022
modelE$coefficients; confint.default(modelE)
## (Intercept) cursmoke bmicat agecat
## -12.5937782 0.3959137 0.1688065 0.8248238
## 2.5 % 97.5 %
## (Intercept) -13.0203154 -12.1672410
## cursmoke 0.2126561 0.5791713
## bmicat 0.0597505 0.2778624
## agecat 0.7328895 0.9167581
exp(modelE$coefficients); exp(confint.default(modelE))
## (Intercept) cursmoke bmicat agecat
## 3.393061e-06 1.485741e+00 1.183891e+00 2.281479e+00
## 2.5 % 97.5 %
## (Intercept) 2.214873e-06 5.197977e-06
## cursmoke 1.236959e+00 1.784559e+00
## bmicat 1.061572e+00 1.320305e+00
## agecat 2.081085e+00 2.501169e+00
AIC(modelA)
## [1] 2132.49
AIC(modelD)
## [1] 1959.928
AIC(modelE)
## [1] 2031.022
## model 1 - gender - sig
model1 = glm.nb(D ~ gender + offset(log(Y)), data=framData)
summary(model1)
##
## Call:
## glm.nb(formula = D ~ gender + offset(log(Y)), data = framData,
## init.theta = 0.9854264366, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.14216 0.08451 -108.177 < 2e-16 ***
## gender -0.48496 0.11733 -4.133 3.58e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.9854) family taken to be 1)
##
## Null deviance: 694.34 on 640 degrees of freedom
## Residual deviance: 680.23 on 639 degrees of freedom
## AIC: 2234.9
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.9854
## Std. Err.: 0.0975
##
## 2 x log-likelihood: -2228.9260
model1$coefficients; confint.default(model1)
## (Intercept) gender
## -9.1421603 -0.4849586
## 2.5 % 97.5 %
## (Intercept) -9.3077998 -8.9765209
## gender -0.7149207 -0.2549965
exp(model1$coefficients); exp(confint.default(model1))
## (Intercept) gender
## 0.0001070558 0.6157226841
## 2.5 % 97.5 %
## (Intercept) 9.071392e-05 0.0001263416
## gender 4.892309e-01 0.7749191909
AIC(model1)
## [1] 2234.926
# model 2 - smoking - not sig
model2 = glm.nb(D ~ cursmoke + offset(log(Y)), data=framData)
summary(model2)
##
## Call:
## glm.nb(formula = D ~ cursmoke + offset(log(Y)), data = framData,
## init.theta = 0.9427530219, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.41755 0.08131 -115.829 <2e-16 ***
## cursmoke 0.08163 0.11843 0.689 0.491
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.9428) family taken to be 1)
##
## Null deviance: 681.08 on 640 degrees of freedom
## Residual deviance: 680.69 on 639 degrees of freedom
## AIC: 2248.4
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.9428
## Std. Err.: 0.0923
##
## 2 x log-likelihood: -2242.4470
model2$coefficients; confint.default(model2)
## (Intercept) cursmoke
## -9.41755119 0.08163472
## 2.5 % 97.5 %
## (Intercept) -9.5769078 -9.2581946
## cursmoke -0.1504912 0.3137607
exp(model2$coefficients); exp(confint.default(model2))
## (Intercept) cursmoke
## 8.128483e-05 1.085059e+00
## 2.5 % 97.5 %
## (Intercept) 6.931094e-05 9.532727e-05
## cursmoke 8.602853e-01 1.368562e+00
AIC(model2)
## [1] 2248.447
# model 3 - diabetes - sig
model3 = glm.nb(D ~ diabetes + offset(log(Y)), data=framData)
summary(model3)
##
## Call:
## glm.nb(formula = D ~ diabetes + offset(log(Y)), data = framData,
## init.theta = 1.016985054, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.54911 0.06296 -151.672 < 2e-16 ***
## diabetes 0.83352 0.14428 5.777 7.59e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.017) family taken to be 1)
##
## Null deviance: 703.87 on 640 degrees of freedom
## Residual deviance: 675.59 on 639 degrees of freedom
## AIC: 2221.1
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.017
## Std. Err.: 0.103
##
## 2 x log-likelihood: -2215.136
model3$coefficients; confint.default(model3)
## (Intercept) diabetes
## -9.549105 0.833520
## 2.5 % 97.5 %
## (Intercept) -9.6725026 -9.425708
## diabetes 0.5507427 1.116297
exp(model3$coefficients); exp(confint.default(model3))
## (Intercept) diabetes
## 7.126499e-05 2.301405e+00
## 2.5 % 97.5 %
## (Intercept) 6.299202e-05 8.062448e-05
## diabetes 1.734541e+00 3.053527e+00
AIC(model3)
## [1] 2221.136
# model 4 - bpmeds - sig
model4 = glm.nb(D ~ bpmeds + offset(log(Y)), data=framData)
summary(model4)
##
## Call:
## glm.nb(formula = D ~ bpmeds + offset(log(Y)), data = framData,
## init.theta = 0.9827439317, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.49980 0.06415 -148.090 < 2e-16 ***
## bpmeds 0.57886 0.14608 3.962 7.42e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.9827) family taken to be 1)
##
## Null deviance: 693.52 on 640 degrees of freedom
## Residual deviance: 680.17 on 639 degrees of freedom
## AIC: 2235.7
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.9827
## Std. Err.: 0.0986
##
## 2 x log-likelihood: -2229.6720
model4$coefficients; confint.default(model4)
## (Intercept) bpmeds
## -9.4997982 0.5788609
## 2.5 % 97.5 %
## (Intercept) -9.6255276 -9.3740688
## bpmeds 0.2925398 0.8651821
exp(model4$coefficients); exp(confint.default(model4))
## (Intercept) bpmeds
## 7.486694e-05 1.784005e+00
## 2.5 % 97.5 %
## (Intercept) 6.602167e-05 8.489726e-05
## bpmeds 1.339826e+00 2.375439e+00
AIC(model4)
## [1] 2235.672
# model 5 - bmicat - not sig
model5 = glm.nb(D ~ bmicat + offset(log(Y)), data=framData)
summary(model5)
##
## Call:
## glm.nb(formula = D ~ bmicat + offset(log(Y)), data = framData,
## init.theta = 0.9502081538, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.93550 0.19851 -50.050 < 2e-16 ***
## bmicat 0.19428 0.06598 2.945 0.00323 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.9502) family taken to be 1)
##
## Null deviance: 683.43 on 640 degrees of freedom
## Residual deviance: 676.16 on 639 degrees of freedom
## AIC: 2241.6
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.9502
## Std. Err.: 0.0929
##
## 2 x log-likelihood: -2235.5780
model5$coefficients; confint.default(model5)
## (Intercept) bmicat
## -9.9355048 0.1942836
## 2.5 % 97.5 %
## (Intercept) -10.32457668 -9.5464330
## bmicat 0.06496495 0.3236023
exp(model5$coefficients); exp(confint.default(model5))
## (Intercept) bmicat
## 4.842449e-05 1.214441e+00
## 2.5 % 97.5 %
## (Intercept) 3.281658e-05 7.145569e-05
## bmicat 1.067122e+00 1.382098e+00
AIC(model5)
## [1] 2241.578
# model 6 - agecat - not sig
model6 = glm.nb(D ~ agecat + offset(log(Y)), data=framData)
summary(model6)
##
## Call:
## glm.nb(formula = D ~ agecat + offset(log(Y)), data = framData,
## init.theta = 2.154780183, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.83177 0.14257 -82.99 <2e-16 ***
## agecat 0.80035 0.04724 16.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(2.1548) family taken to be 1)
##
## Null deviance: 948.99 on 640 degrees of freedom
## Residual deviance: 689.06 on 639 degrees of freedom
## AIC: 2049
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 2.155
## Std. Err.: 0.277
##
## 2 x log-likelihood: -2043.024
model6$coefficients; confint.default(model6)
## (Intercept) agecat
## -11.8317697 0.8003453
## 2.5 % 97.5 %
## (Intercept) -12.111210 -11.5523297
## agecat 0.707753 0.8929376
exp(model6$coefficients); exp(confint.default(model6))
## (Intercept) agecat
## 7.269887e-06 2.226310e+00
## 2.5 % 97.5 %
## (Intercept) 5.497541e-06 9.613620e-06
## agecat 2.029426e+00 2.442294e+00
AIC(model6)
## [1] 2049.024