1. Verify that the calculations of total time of exposure and
person-days experienced appears to be correct by reviewing the contents
of the dataset.
# setting the environment
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
binData = read_csv("binlymph.csv")
## Rows: 20 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): stage, bin, D, P_Days, I_Rate, L, mid_days, P, Survival, Stage3, S...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(binData)
## # A tibble: 20 × 11
## stage bin D P_Days I_Rate L mid_days P Survival Stage3 Stage4
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 1 132 0.00758 7 3.5 0.947 0.947 1 NA
## 2 0 7 0 144 0 8 11 1 0.947 0.947 NA
## 3 0 15 1 259 0.00386 15 22.5 0.942 0.892 0.947 NA
## 4 0 30 3 429 0.00699 30 45 0.790 0.705 0.892 NA
## 5 0 60 0 390 0 30 75 1 0.705 0.705 NA
## 6 0 90 1 364 0.00275 30 105 0.918 0.647 0.705 NA
## 7 0 120 1 336 0.00298 30 135 0.911 0.589 0.647 NA
## 8 0 150 0 319 0 30 165 1 0.589 0.589 NA
## 9 0 180 2 703 0.00284 90 225 0.744 0.438 0.589 NA
## 10 0 270 0 227 0 NA 315 1 0.438 0.438 NA
## 11 1 0 1 109 0.00917 7 3.5 0.936 0.936 NA 1
## 12 1 7 3 109 0.0275 8 11 0.780 0.730 NA 0.936
## 13 1 15 0 180 0 15 22.5 1 0.730 NA 0.730
## 14 1 30 4 297 0.0135 30 45 0.596 0.435 NA 0.730
## 15 1 60 3 205 0.0146 30 75 0.561 0.244 NA 0.435
## 16 1 90 1 123 0.00813 30 105 0.756 0.184 NA 0.244
## 17 1 120 0 120 0 30 135 1 0.184 NA 0.184
## 18 1 150 1 115 0.00870 30 165 0.739 0.136 NA 0.184
## 19 1 180 0 247 0 90 225 1 0.136 NA 0.136
## 20 1 270 0 96 0 NA 315 1 0.136 NA 0.136
summary(binData)
## stage bin D P_Days
## Min. :0.0 Min. : 0.0 Min. :0.00 Min. : 96.0
## 1st Qu.:0.0 1st Qu.: 15.0 1st Qu.:0.00 1st Qu.:122.2
## Median :0.5 Median : 75.0 Median :1.00 Median :216.0
## Mean :0.5 Mean : 92.2 Mean :1.10 Mean :245.2
## 3rd Qu.:1.0 3rd Qu.:150.0 3rd Qu.:1.25 3rd Qu.:323.2
## Max. :1.0 Max. :270.0 Max. :4.00 Max. :703.0
##
## I_Rate L mid_days P
## Min. :0.000000 Min. : 7 Min. : 3.5 Min. :0.5610
## 1st Qu.:0.000000 1st Qu.:15 1st Qu.: 22.5 1st Qu.:0.7739
## Median :0.002911 Median :30 Median : 90.0 Median :0.9389
## Mean :0.005431 Mean :30 Mean :110.2 Mean :0.8810
## 3rd Qu.:0.008271 3rd Qu.:30 3rd Qu.:165.0 3rd Qu.:1.0000
## Max. :0.027523 Max. :90 Max. :315.0 Max. :1.0000
## NA's :2
## Survival Stage3 Stage4
## Min. :0.1363 Min. :0.4383 Min. :0.1363
## 1st Qu.:0.2291 1st Qu.:0.6035 1st Qu.:0.1845
## Median :0.5891 Median :0.7050 Median :0.3394
## Mean :0.5375 Mean :0.7459 Mean :0.4716
## 3rd Qu.:0.7297 3rd Qu.:0.9333 3rd Qu.:0.7297
## Max. :0.9470 Max. :1.0000 Max. :1.0000
## NA's :10 NA's :10
# calculation of total time of exposure (=total person time, person-days)
total_pt <- sum(binData$P_Days)
total_pt
## [1] 4904
#plot of S(t) –vs.- mid_days for each group
qplot(x=mid_days, y=Survival,
col=factor(stage, labels=c("Stage 3", "Stage 4")),
data=binData) + labs(col="Cancer Stage")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3. Fit several log-linear Poisson regression models to the grouped
survival data - Generate time terms, centered and spline, interaction
terms
# Generate time terms, centered and spline:
binData = binData %>%
mutate(t60 = t-60) %>%
mutate(t60sp = ifelse(t > 60, t-60, 0))
# Model A: stage
modelA = glm(D ~ stage, offset=log(N), family=poisson(link="log"), data=binData)
summary(modelA)
##
## Call:
## glm(formula = D ~ stage, family = poisson(link = "log"), data = binData,
## offset = log(N))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.9054 0.3333 -17.716 <2e-16 ***
## stage 1.0919 0.4336 2.518 0.0118 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 30.505 on 19 degrees of freedom
## Residual deviance: 24.053 on 18 degrees of freedom
## AIC: 56.908
##
## Number of Fisher Scoring iterations: 5
modelA$coefficients; confint.default(modelA) ## coefficients
## (Intercept) stage
## -5.905362 1.091927
## 2.5 % 97.5 %
## (Intercept) -6.5586828 -5.252041
## stage 0.2420319 1.941823
exp(modelA$coefficients); exp(confint.default(modelA)) ## IRR
## (Intercept) stage
## 0.002724796 2.980012492
## 2.5 % 97.5 %
## (Intercept) 0.001417752 0.00523682
## stage 1.273834818 6.97144899
# Model B: stage + t-60
modelB = glm(D ~ stage + t60, offset=log(N),
family=poisson(link="log"), data=binData)
summary(modelB)
##
## Call:
## glm(formula = D ~ stage + t60, family = poisson(link = "log"),
## data = binData, offset = log(N))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.603146 0.340600 -16.451 <2e-16 ***
## stage 0.942699 0.437674 2.154 0.0312 *
## t60 -0.006977 0.003166 -2.204 0.0275 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 30.505 on 19 degrees of freedom
## Residual deviance: 18.019 on 17 degrees of freedom
## AIC: 52.874
##
## Number of Fisher Scoring iterations: 5
modelB$coefficients; confint.default(modelB) ## coefficients
## (Intercept) stage t60
## -5.603146449 0.942699007 -0.006976953
## 2.5 % 97.5 %
## (Intercept) -6.27070994 -4.9355829625
## stage 0.08487278 1.8005252297
## t60 -0.01318209 -0.0007718107
exp(modelB$coefficients); exp(confint.default(modelB)) ## IRR
## (Intercept) stage t60
## 0.003686247 2.566900159 0.993047330
## 2.5 % 97.5 %
## (Intercept) 0.001890886 0.00718627
## stage 1.088578574 6.05282575
## t60 0.986904409 0.99922849
# Model C: stage + t-60 + (t-60)^+
modelC = glm(D ~ stage + t60 + t60sp, offset=log(N),
family=poisson(link="log"), data=binData)
summary(modelC)
##
## Call:
## glm(formula = D ~ stage + t60 + t60sp, family = poisson(link = "log"),
## data = binData, offset = log(N))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.4322926 0.4621789 -11.754 <2e-16 ***
## stage 0.9514927 0.4378395 2.173 0.0298 *
## t60 -0.0006996 0.0123224 -0.057 0.9547
## t60sp -0.0081450 0.0154147 -0.528 0.5972
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 30.505 on 19 degrees of freedom
## Residual deviance: 17.736 on 16 degrees of freedom
## AIC: 54.591
##
## Number of Fisher Scoring iterations: 5
modelC$coefficients; confint.default(modelC) ## coefficients
## (Intercept) stage t60 t60sp
## -5.4322926309 0.9514926947 -0.0006995638 -0.0081450320
## 2.5 % 97.5 %
## (Intercept) -6.33814667 -4.52643860
## stage 0.09334297 1.80964242
## t60 -0.02485107 0.02345194
## t60sp -0.03835733 0.02206726
exp(modelC$coefficients); exp(confint.default(modelC)) ## IRR
## (Intercept) stage t60 t60sp
## 0.004373058 2.589572216 0.999300681 0.991888049
## 2.5 % 97.5 %
## (Intercept) 0.001767575 0.01081914
## stage 1.097838197 6.10826284
## t60 0.975455177 1.02372910
## t60sp 0.962368999 1.02231255
# Model D: stage + t-60 + (t-60)^+ + stage*(t-60) + stage*(t-60)^+
modelD = glm(D ~ stage + t60 + t60sp + stage:t60 + stage:t60sp, offset=log(N),
family=poisson(link="log"), data=binData)
summary(modelD)
##
## Call:
## glm(formula = D ~ stage + t60 + t60sp + stage:t60 + stage:t60sp,
## family = poisson(link = "log"), data = binData, offset = log(N))
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.792139 0.632165 -9.162 <2e-16 ***
## stage 1.614121 0.804147 2.007 0.0447 *
## t60 -0.009640 0.019928 -0.484 0.6286
## t60sp 0.005976 0.023829 0.251 0.8020
## stage:t60 0.016566 0.025548 0.648 0.5167
## stage:t60sp -0.029494 0.032583 -0.905 0.3654
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 30.505 on 19 degrees of freedom
## Residual deviance: 16.233 on 14 degrees of freedom
## AIC: 57.088
##
## Number of Fisher Scoring iterations: 5
modelD$coefficients; confint.default(modelD) ## coefficients
## (Intercept) stage t60 t60sp stage:t60 stage:t60sp
## -5.792139416 1.614121315 -0.009639959 0.005975502 0.016565567 -0.029494184
## 2.5 % 97.5 %
## (Intercept) -7.03115989 -4.55311894
## stage 0.03802288 3.19021975
## t60 -0.04869908 0.02941916
## t60sp -0.04072942 0.05268042
## stage:t60 -0.03350662 0.06663775
## stage:t60sp -0.09335523 0.03436687
exp(modelD$coefficients); exp(confint.default(modelD)) ## IRR
## (Intercept) stage t60 t60sp stage:t60 stage:t60sp
## 0.003051447 5.023471935 0.990406356 1.005993391 1.016703537 0.970936525
## 2.5 % 97.5 %
## (Intercept) 0.0008839059 0.0105343
## stage 1.0387549969 24.2937655
## t60 0.9524677004 1.0298562
## t60sp 0.9600888770 1.0540927
## stage:t60 0.9670485101 1.0689082
## stage:t60sp 0.9108698702 1.0349642