```
library(magrittr)
library(gnm)
require(survival)
## Raw data
dat <- read.table("http://statistics.open.ac.uk/sccs/R/ox.txt", header = TRUE)
dat
## indiv eventday start end exday
## 1 1 398 365 730 458
## 2 2 413 365 730 392
## 3 3 449 365 730 429
## 4 4 455 365 730 433
## 5 5 472 365 730 432
## 6 6 474 365 730 395
## 7 7 485 365 730 470
## 8 8 524 365 730 496
## 9 9 700 365 730 428
## 10 10 399 365 730 716
ex1 <- dat$exday + 14
ex2 <- dat$exday + 35
expo <- cbind(ex1, ex2)
age <- c(547)
expolev <- c(1, 0)
ncuts <- ncol(expo) + length(age) + 2
nevents <- nrow(dat)
#create an ordered list of individual events and
#cut points for start, end, exposure and age groups
ind <- rep(1:nevents, times = ncuts)
cutp <- c(as.matrix(dat$start), as.matrix(dat$end), expo, rep(age, each = nevents))
o <- order(ind, cutp)
ind = as.factor(ind[o])
cutp = cutp[o]
#calculate interval lengths, set to 0 if before start or after end
interval <- c(0, cutp[2:length(ind)]-cutp[1:length(ind)-1])
interval <- ifelse(cutp<=dat$start[ind], 0, interval)
interval <- ifelse(cutp>dat$end[ind], 0, interval)
#event = 1 if event occurred in interval, otherwise 0
event <- ifelse(dat$eventday[ind]>cutp-interval, 1, 0)
event <- ifelse(dat$eventday[ind]<=cutp, event, 0)
#age groups
agegr <- cut(cutp, breaks = c(min(dat$start), age, max(dat$end)), labels = FALSE)
agegr <- as.factor(agegr)
#exposure groups
exgr <- rep(0, nevents*ncuts)
for(i in 1:ncol(expo)){
exgr <- ifelse(cutp > expo[,i][ind], expolev[i], exgr)
}
exgr <- as.factor(exgr)
#put all data in a data frame, take out data with 0 interval lengths
chopdat <- data.frame(indiv = ind[interval!=0], event = event[interval!=0], interval = interval[interval!=0], agegr = agegr[interval!=0], exgr = exgr[interval!=0], loginterval = log(interval[interval!=0]))
## Show long format data
chopdat
## indiv event interval agegr exgr loginterval
## 1 1 1 107 1 0 4.672829
## 2 1 0 21 1 1 3.044522
## 3 1 0 54 1 0 3.988984
## 4 1 0 183 2 0 5.209486
## 5 2 0 41 1 0 3.713572
## 6 2 1 21 1 1 3.044522
## 7 2 0 120 1 0 4.787492
## 8 2 0 183 2 0 5.209486
## 9 3 0 78 1 0 4.356709
## 10 3 1 21 1 1 3.044522
## 11 3 0 83 1 0 4.418841
## 12 3 0 183 2 0 5.209486
## 13 4 0 82 1 0 4.406719
## 14 4 1 21 1 1 3.044522
## 15 4 0 79 1 0 4.369448
## 16 4 0 183 2 0 5.209486
## 17 5 0 81 1 0 4.394449
## 18 5 0 21 1 1 3.044522
## 19 5 1 80 1 0 4.382027
## 20 5 0 183 2 0 5.209486
## 21 6 0 44 1 0 3.784190
## 22 6 0 21 1 1 3.044522
## 23 6 1 117 1 0 4.762174
## 24 6 0 183 2 0 5.209486
## 25 7 0 119 1 0 4.779123
## 26 7 1 21 1 1 3.044522
## 27 7 0 42 1 0 3.737670
## 28 7 0 183 2 0 5.209486
## 29 8 0 145 1 0 4.976734
## 30 8 1 21 1 1 3.044522
## 31 8 0 16 1 0 2.772589
## 32 8 0 183 2 0 5.209486
## 33 9 0 77 1 0 4.343805
## 34 9 0 21 1 1 3.044522
## 35 9 0 84 1 0 4.430817
## 36 9 1 183 2 0 5.209486
## 37 10 1 182 1 0 5.204007
## 38 10 0 183 2 0 5.209486
## Using gnm
gnm1 <- gnm(event ~ exgr + agegr + offset(loginterval), data = chopdat,
family = poisson(link = "log"),
eliminate = indiv) # corresponds to strata(indiv)
summary(gnm1)
##
## Call:
## gnm(formula = event ~ exgr + agegr + offset(loginterval), eliminate = indiv,
## family = poisson(link = "log"), data = chopdat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.05409 -0.60311 -0.42566 0.08177 1.72741
##
## Coefficients of interest:
## Estimate Std. Error z value Pr(>|z|)
## exgr1 2.4880 0.7085 3.512 0.000445 ***
## agegr2 -1.4906 1.1182 -1.333 0.182543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Residual deviance: 20.177 on 26 degrees of freedom
## AIC: 64.177
##
## Number of iterations: 5
## Using clogit
clogit1 <- clogit(event ~ exgr + agegr + strata(indiv) + offset(loginterval), data = chopdat)
summary(clogit1)
## Call:
## coxph(formula = Surv(rep(1, 38L), event) ~ exgr + agegr + strata(indiv) +
## offset(loginterval), data = chopdat, method = "exact")
##
## n= 38, number of events= 10
##
## coef exp(coef) se(coef) z Pr(>|z|)
## exgr1 2.4880 12.0369 0.7085 3.512 0.000445 ***
## agegr2 -1.4906 0.2252 1.1182 -1.333 0.182543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## exgr1 12.0369 0.08308 3.00226 48.259
## agegr2 0.2252 4.43965 0.02517 2.016
##
## Rsquare= 0.392 (max possible= 0.643 )
## Likelihood ratio test= 18.92 on 2 df, p=0.0000781
## Wald test = 19.1 on 2 df, p=0.0000713
## Score (logrank) test = 42.29 on 2 df, p=6.557e-10
## Raw data
dat <- read.table("http://statistics.open.ac.uk/sccs/R/ox.txt", header = TRUE)
dat
## indiv eventday start end exday
## 1 1 398 365 730 458
## 2 2 413 365 730 392
## 3 3 449 365 730 429
## 4 4 455 365 730 433
## 5 5 472 365 730 432
## 6 6 474 365 730 395
## 7 7 485 365 730 470
## 8 8 524 365 730 496
## 9 9 700 365 730 428
## 10 10 399 365 730 716
## Start of exposure period
dat$start1 <- dat$exday + 14
## End of exposure period
dat$end1 <- dat$exday + 35
## Age period split time point
dat$age1 <- c(547)
## Create long format data
datLong <- plyr::ddply(.data = dat,
.variables = "indiv",
.fun = function(df) {
## Cut points
cuts <- with(df, sort(c(start, end, start1, end1, age1)))
## Split observation times
out <- data.frame(indiv = df$indiv,
exday = df$exday,
eventday = df$eventday,
start = head(cuts, -1),
end = cuts[-1])
## Event status for each interval
## 1 if event day is within the interval
out$event <- as.numeric(out$start <= df$eventday & df$eventday <= out$end)
## Exposure status for each interval
## 1 if interval is within the window
out$exgr <- as.numeric(df$start1 <= out$start & out$end <= df$end1)
## Age group status for each interval
## 1 if start is greater than the age cutoff
out$agegr <- as.numeric(out$start >= df$age1)
## Interval length
out$interval <- out$end - out$start
out$loginterval <- log(out$interval)
out
})
## Drop time period after observation period
datLong <- subset(datLong, start < 730)
## Show long format data
datLong
## indiv exday eventday start end event exgr agegr interval loginterval
## 1 1 458 398 365 472 1 0 0 107 4.672829
## 2 1 458 398 472 493 0 1 0 21 3.044522
## 3 1 458 398 493 547 0 0 0 54 3.988984
## 4 1 458 398 547 730 0 0 1 183 5.209486
## 5 2 392 413 365 406 0 0 0 41 3.713572
## 6 2 392 413 406 427 1 1 0 21 3.044522
## 7 2 392 413 427 547 0 0 0 120 4.787492
## 8 2 392 413 547 730 0 0 1 183 5.209486
## 9 3 429 449 365 443 0 0 0 78 4.356709
## 10 3 429 449 443 464 1 1 0 21 3.044522
## 11 3 429 449 464 547 0 0 0 83 4.418841
## 12 3 429 449 547 730 0 0 1 183 5.209486
## 13 4 433 455 365 447 0 0 0 82 4.406719
## 14 4 433 455 447 468 1 1 0 21 3.044522
## 15 4 433 455 468 547 0 0 0 79 4.369448
## 16 4 433 455 547 730 0 0 1 183 5.209486
## 17 5 432 472 365 446 0 0 0 81 4.394449
## 18 5 432 472 446 467 0 1 0 21 3.044522
## 19 5 432 472 467 547 1 0 0 80 4.382027
## 20 5 432 472 547 730 0 0 1 183 5.209486
## 21 6 395 474 365 409 0 0 0 44 3.784190
## 22 6 395 474 409 430 0 1 0 21 3.044522
## 23 6 395 474 430 547 1 0 0 117 4.762174
## 24 6 395 474 547 730 0 0 1 183 5.209486
## 25 7 470 485 365 484 0 0 0 119 4.779123
## 26 7 470 485 484 505 1 1 0 21 3.044522
## 27 7 470 485 505 547 0 0 0 42 3.737670
## 28 7 470 485 547 730 0 0 1 183 5.209486
## 29 8 496 524 365 510 0 0 0 145 4.976734
## 30 8 496 524 510 531 1 1 0 21 3.044522
## 31 8 496 524 531 547 0 0 0 16 2.772589
## 32 8 496 524 547 730 0 0 1 183 5.209486
## 33 9 428 700 365 442 0 0 0 77 4.343805
## 34 9 428 700 442 463 0 1 0 21 3.044522
## 35 9 428 700 463 547 0 0 0 84 4.430817
## 36 9 428 700 547 730 1 0 1 183 5.209486
## 37 10 716 399 365 547 1 0 0 182 5.204007
## 38 10 716 399 547 730 0 0 1 183 5.209486
## Conditional Poisson regression using gnm
gnm1 <- gnm(event ~ exgr + agegr + offset(loginterval),
data = datLong,
family = poisson(link = "log"),
eliminate = factor(indiv)) # corresponds to strata(indiv)
summary(gnm1)
##
## Call:
## gnm(formula = event ~ exgr + agegr + offset(loginterval), eliminate = factor(indiv),
## family = poisson(link = "log"), data = datLong)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.05409 -0.60311 -0.42566 0.08177 1.72741
##
## Coefficients of interest:
## Estimate Std. Error z value Pr(>|z|)
## exgr 2.4880 0.7085 3.512 0.000445 ***
## agegr -1.4906 1.1182 -1.333 0.182543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Residual deviance: 20.177 on 26 degrees of freedom
## AIC: 64.177
##
## Number of iterations: 5
## Conditional logistic regression using clogit
clogit1 <- clogit(event ~ exgr + agegr + strata(indiv) + offset(loginterval), data = datLong)
summary(clogit1)
## Call:
## coxph(formula = Surv(rep(1, 38L), event) ~ exgr + agegr + strata(indiv) +
## offset(loginterval), data = datLong, method = "exact")
##
## n= 38, number of events= 10
##
## coef exp(coef) se(coef) z Pr(>|z|)
## exgr 2.4880 12.0369 0.7085 3.512 0.000445 ***
## agegr -1.4906 0.2252 1.1182 -1.333 0.182543
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## exgr 12.0369 0.08308 3.00226 48.259
## agegr 0.2252 4.43965 0.02517 2.016
##
## Rsquare= 0.392 (max possible= 0.643 )
## Likelihood ratio test= 18.92 on 2 df, p=0.0000781
## Wald test = 19.1 on 2 df, p=0.0000713
## Score (logrank) test = 42.29 on 2 df, p=6.557e-10