TIME SERIES ANALYSIS: Distributed lag non-linear model

This is an R HTML document. Here a DLNM analysis demonstrates the association between total mortalities and environmental temperature

#If csv use read.csv
setwd("/Users/kennyorielolana/Documents/DATA ANALYSIS and Mapping/Training in Time series analysis")

## Error in setwd("/Users/kennyorielolana/Documents/DATA ANALYSIS and Mapping/Training in Time series analysis"): cannot change working directory

data<-read.csv("Sample.csv")
View(data)

## Error in check_for_XQuartz(file.path(R.home("modules"), "R_de.so")): X11 library is missing: install XQuartz from www.xquartz.org

#Distributed Lag Non-linear Model (DLNM)
library(dlnm)

## This is dlnm 2.4.7. For details: help(dlnm) and vignette('dlnmOverview').

library(splines)
library(mgcv)

## Loading required package: nlme

## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.

#build up basis function for exposure-response dimension
argvar <- list(fun="ns", knots=equalknots(data$temp, df=4))

#build up basis function for exposure-lag dimensions (df=3)
arglag=list(knots=logknots(21,3))

#build up the basis function for exposure-lag-response dimension ## 21 day lag was chosen
cb <- crossbasis(data$temp, lag=21, argvar=argvar, arglag=arglag)

#Check parameter settings for cross basis matrix
summary(cb)

## CROSSBASIS FUNCTIONS
## observations: 1461 
## range: -2.4 to 34 
## lag period: 0 21 
## total df:  20 
## 
## BASIS FOR VAR:
## fun: ns 
## knots: 6.7 15.8 24.9 
## intercept: FALSE 
## Boundary.knots: -2.4 34 
## 
## BASIS FOR LAG:
## fun: ns 
## knots: 1.011193 2.779473 7.639956 
## intercept: TRUE 
## Boundary.knots: 0 21

#Build up the time series model
formula <- total~cb+as.factor(dow)+ns(rh,3)+ns(day, 4*7)
model <- gam(formula, data, family=quasipoisson,
             na.action="na.exclude")

#Parsing and plotting
pred.temp<- crosspred(cb, model, by=0.1, cen=median(data$temp,
                                                    na.rm=TRUE))
#plot1 is shown below
plot(pred.temp, "overall", col=2, cex.axis=1.50, xlab="temperature (C)"
     , ylab="RR", main="Overall effects of the temperature of the sample",
     cex.main=1.5)

##Find the minimum mortality temp
which.min(pred.temp$allRRfit) #equal to MMT=26

## 25.9 
##  283

#Use 26 as the minimum temp and re-predict
predcum<- crosspred(cb, model, cumul=TRUE, cen=26)

#Re-plot, with reference temp (MMT) plot no.2
plot(predcum, "overall", col=2, cex.axis=1.50, xlab="temperature(C)",
     ylab="RR", main="Overall effect of the sample temperature",
     cex.main=1.5, cex.lab=1.5)

#Extract the value of risk of death corresponding to a temp, e.g. at extreme low and high
temp2.5<- round(quantile(data$temp, probs=c(0.025), na.rm=TRUE), 0)
temp97.5<- round(quantile(data$temp, probs=c(0.975), na.rm=TRUE),0)

#Cumulative effects of all lagged days (RR, 95% CI), 21 days effects is chosen. Here RR of temp 2.5 and 97.5, including 95%CI were extracted
round(cbind(predcum$allRRfit, predcum$allRRlow, predcum$allRRhigh)
      [c(as.character(temp2.5), as.character(temp97.5)),], digits=3)

##     [,1]  [,2]  [,3]
## 2  1.644 1.383 1.955
## 31 1.080 1.003 1.164

#same-day lagged effects of a specific temp on a specific lagged day
# Ex. effect of extremely low temp (temp2.5) and extremely high temp (temp97.5)- effect of lag
# on lag day 2 (lag2)
round(cbind(predcum$matRRfit[as.character(temp2.5),]["lag2"],
            predcum$matRRlow[as.character(temp2.5),]["lag2"],
            predcum$matRRhigh[as.character(temp2.5),]["lag2"]), digits=3)

##       [,1]  [,2]  [,3]
## lag2 1.138 1.102 1.175

#for extremely high temp
round(cbind(predcum$matRRfit[as.character(temp97.5),]["lag2"],
            predcum$matRRlow[as.character(temp97.5),]["lag2"],
            predcum$matRRhigh[as.character(temp97.5),]["lag2"]), digits=3)

##       [,1] [,2]  [,3]
## lag2 0.988 0.97 1.007

#Cumulative effects of a specific temperature on certain lagged days, page 19 - effects of both lag and the temperature
#Ex. extremely low and high temperatures lagged by 2 days (lag2)
round(cbind(predcum$cumRRfit[as.character(temp2.5),]["lag2"],
            predcum$cumRRlow[as.character(temp2.5),]["lag2"],
            predcum$cumRRhigh[as.character(temp2.5),]["lag2"]), digits=3)

##       [,1]  [,2]  [,3]
## lag2 0.884 0.826 0.945

round(cbind(predcum$cumRRfit[as.character(temp97.5),]["lag2"],
            predcum$cumRRlow[as.character(temp97.5),]["lag2"],
            predcum$cumRRhigh[as.character(temp97.5),]["lag2"]), digits=3)

##       [,1] [,2]  [,3]
## lag2 1.108 1.07 1.148

#Plot3
plot(predcum, "overall", col=2, cex.axis=1.50, xlab="temperature(oC)",
     ylab="RR", main ="overall temperature effects", cex.main=1.5,cex.lab=1.5)
### To add a reference line to the graph
abline(v=c(temp2.5, temp97.5), lty=2)
abline(v=25.9, lty=1)

#Plot the effect of a single day's lag for a spec. temperature. Plot 4
plot(predcum, "slices", var=temp2.5, col=2, cex.axis=1.50, xlab="lag day",
     ylab="RR", main="Lag effect at 2.5th percentile temperature", cex.main=1.5)

#Plot the cumulative lag effect for a specific temperature, Plot 5
plot(predcum, "slices", var= temp2.5, cumul=TRUE, col=2, cex.axis=1.50, xlab="lag days", ylab="RR",main="Cumulative lagged effect of temperatures at the 2.5th percentile of the sample",cex.main=1.5)

### To add a reference line to the graph
abline(v=c(temp2.5, temp97.5), lty=2)
abline(v=25.9, lty=1)

#Calculate attributable fraction
### attrdl function
attrdl <- function(x,basis,cases,model=NULL,coef=NULL,vcov=NULL,type="af",
dir="back",tot=TRUE,cen,range=NULL,sim=FALSE,nsim=5000) {
#
  # CHECK VERSION OF THE DLNM PACKAGE
if(packageVersion("dlnm")<"2.2.0")
stop("update dlnm package to version >= 2.2.0")

## EXTRACT NAME AND CHECK type AND dir
name <- deparse(substitute(basis))
type <- match.arg(type,c("an","af"))
dir <- match.arg(dir,c("back","forw"))
#
# DEFINE CENTERING
if(missing(cen) && is.null(cen <- attr(basis,"argvar")$cen))
stop("'cen' must be provided")
if(!is.numeric(cen) && length(cen)>1L) stop("'cen' must be a numeric scalar")
attributes(basis)$argvar$cen <- NULL
## SELECT RANGE (FORCE TO CENTERING VALUE OTHERWISE, MEANING NULL RISK)
if(!is.null(range)) x[x<range[1]|x>range[2]] <- cen
## COMPUTE THE MATRIX OF
# - LAGGED EXPOSURES IF dir="back"
# - CONSTANT EXPOSURES ALONG LAGS IF dir="forw"
lag <- attr(basis,"lag")
if(NCOL(x)==1L) {
at <- if(dir=="back") tsModel:::Lag(x,seq(lag[1],lag[2])) else
matrix(rep(x,diff(lag)+1),length(x))
} else {
if(dir=="forw") stop("'x' must be a vector when dir='forw'")
if(ncol(at <- x)!=diff(lag)+1)
stop("dimension of 'x' not compatible with 'basis'")
}
#
# NUMBER USED FOR THE CONTRIBUTION AT EACH TIME IN FORWARD TYPE
# - IF cases PROVIDED AS A MATRIX, TAKE THE ROW AVERAGE
# - IF PROVIDED AS A TIME SERIES, COMPUTE THE FORWARD MOVING AVERAGE
# - THIS EXCLUDES MISSING ACCORDINGLY
# ALSO COMPUTE THE DENOMINATOR TO BE USED BELOW
if(NROW(cases)!=NROW(at)) stop("'x' and 'cases' not consistent")
if(NCOL(cases)>1L) {
if(dir=="back") stop("'cases' must be a vector if dir='back'")
if(ncol(cases)!=diff(lag)+1) stop("dimension of 'cases' not compatible")
den <- sum(rowMeans(cases,na.rm=TRUE),na.rm=TRUE)
cases <- rowMeans(cases)
} else {
den <- sum(cases,na.rm=TRUE)
if(dir=="forw")
cases <- rowMeans(as.matrix(tsModel:::Lag(cases,-seq(lag[1],lag[2]))))
}
#
# EXTRACT COEF AND VCOV IF MODEL IS PROVIDED
if(!is.null(model)) {
cond <- paste0(name,"[[:print:]]*v[0-9]{1,2}\\.l[0-9]{1,2}")
if(ncol(basis)==1L) cond <- name
model.class <- class(model)
coef <- dlnm:::getcoef(model,model.class)
ind <- grep(cond,names(coef))
coef <- coef[ind]
vcov <- dlnm:::getvcov(model,model.class)[ind,ind,drop=FALSE]
model.link <- dlnm:::getlink(model,model.class)
if(model.link!="log") stop("'model' must have a log link function")
}
#
# IF REDUCED ESTIMATES ARE PROVIDED
typebasis <- ifelse(length(coef)!=ncol(basis),"one","cb")
#
# PREPARE THE ARGUMENTS FOR TH BASIS TRANSFORMATION
predvar <- if(typebasis=="one") x else seq(NROW(at))
predlag <- if(typebasis=="one") 0 else dlnm:::seqlag(lag)
#
# CREATE THE MATRIX OF TRANSFORMED CENTRED VARIABLES (DEPENDENT ON typebasis)
if(typebasis=="cb") {
Xpred <- dlnm:::mkXpred(typebasis,basis,at,predvar,predlag,cen)
Xpredall <- 0
for (i in seq(length(predlag))) {
ind <- seq(length(predvar))+length(predvar)*(i-1)
Xpredall <- Xpredall + Xpred[ind,,drop=FALSE]
}
} else {
basis <- do.call(onebasis,c(list(x=x),attr(basis,"argvar")))
Xpredall <- dlnm:::mkXpred(typebasis,basis,x,predvar,predlag,cen)
}
#
# CHECK DIMENSIONS
if(length(coef)!=ncol(Xpredall))
stop("arguments 'basis' do not match 'model' or 'coef'-'vcov'")
if(any(dim(vcov)!=c(length(coef),length(coef))))
stop("arguments 'coef' and 'vcov' do no match")
if(typebasis=="one" && dir=="back")
stop("only dir='forw' allowed for reduced estimates")
#
# COMPUTE AF AND AN
af <- 1-exp(-drop(as.matrix(Xpredall%*%coef)))
an <- af*cases
#
# TOTAL
# - SELECT NON-MISSING OBS CONTRIBUTING TO COMPUTATION
# - DERIVE TOTAL AF
# - COMPUTE TOTAL AN WITH ADJUSTED DENOMINATOR (OBSERVED TOTAL NUMBER)
if(tot) {
isna <- is.na(an)
af <- sum(an[!isna])/sum(cases[!isna])
an <- af*den
}
#
# EMPIRICAL CONFIDENCE INTERVALS
if(!tot && sim) {
sim <- FALSE
warning("simulation samples only returned for tot=T")
}
if(sim) {
# SAMPLE COEF
k <- length(coef)
eigen <- eigen(vcov)
X <- matrix(rnorm(length(coef)*nsim),nsim)
coefsim <- coef + eigen$vectors %*% diag(sqrt(eigen$values),k) %*% t(X)
# RUN THE LOOP
# pre_afsim <- (1 - exp(- Xpredall %*% coefsim)) * cases # a matrix
# afsim <- colSums(pre_afsim,na.rm=TRUE) / sum(cases[!isna],na.rm=TRUE)
afsim <- apply(coefsim,2, function(coefi) {
ani <- (1-exp(-drop(Xpredall%*%coefi)))*cases
sum(ani[!is.na(ani)])/sum(cases[!is.na(ani)])
})
ansim <- afsim*den
}
#
res <- if(sim) {
if(type=="an") ansim else afsim
} else {
if(type=="an") an else af
}
#
return(res)
}

##Build up the matrix and calculate the AF.
matsim <- matrix(NA,1,3,dimnames=list("shanghai",c("glob","cold","heat")))
nsim <- 1000
matsim[1,"glob"] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28)
matsim[1,"cold"] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28, range=c(-100,28))
matsim[1,"heat"] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28, range=c(28,100))

arraysim <- array(NA, dim = c(1, 3, nsim), dimnames = list("shanghai", c("glob", "cold", "heat")))
arraysim[1,"glob",] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28,sim=T,nsim=nsim)
arraysim[1,"cold",] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28,
range=c(-100,28),sim=T,nsim=nsim)
arraysim[1,"heat",] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28,
range=c(28,100),sim=T,nsim=nsim)

## Calculating total deaths
tottotal <- sum(data$total,na.rm=T)
ancity <- matsim
ancitylow <- apply(arraysim,c(1,2),quantile,0.025)
ancityhigh <- apply(arraysim,c(1,2),quantile,0.975)
rownames(ancity) <- rownames(ancitylow) <- rownames(ancityhigh) <- "shanghai"

# Calculating AF
afcity <- ancity/tottotal*100
afcitylow <- ancitylow/tottotal*100
afcityhigh <- ancityhigh/tottotal*100

print(afcity)

##              glob     cold     heat
## shanghai 14.99528 14.45167 0.543611

afcitylow

##              glob    cold      heat
## shanghai 7.692746 6.46287 0.1182538

afcityhigh

##            glob     cold      heat
## shanghai 21.862 21.33376 0.9521218

Overall AF of temperature to death is 14.995% (95% CI: 7.261%, 21.142%) AF of low temperature to death is 14.452% (95% CI:6.853%, 21.198%); and high temperature 0.544% (95% CI: 0.108%, 0.958%).