This is an R HTML document. Here a DLNM analysis demonstrates the association between total mortalities and environmental temperature
#If csv use read.csv setwd("/Users/kennyorielolana/Documents/DATA ANALYSIS and Mapping/Training in Time series analysis")
## Error in setwd("/Users/kennyorielolana/Documents/DATA ANALYSIS and Mapping/Training in Time series analysis"): cannot change working directory
data<-read.csv("Sample.csv") View(data)
## Error in check_for_XQuartz(file.path(R.home("modules"), "R_de.so")): X11 library is missing: install XQuartz from www.xquartz.org
#Distributed Lag Non-linear Model (DLNM) library(dlnm)
library(splines) library(mgcv)
#build up basis function for exposure-response dimension argvar <- list(fun="ns", knots=equalknots(data$temp, df=4)) #build up basis function for exposure-lag dimensions (df=3) arglag=list(knots=logknots(21,3)) #build up the basis function for exposure-lag-response dimension ## 21 day lag was chosen cb <- crossbasis(data$temp, lag=21, argvar=argvar, arglag=arglag) #Check parameter settings for cross basis matrix summary(cb)
## CROSSBASIS FUNCTIONS ## observations: 1461 ## range: -2.4 to 34 ## lag period: 0 21 ## total df: 20 ## ## BASIS FOR VAR: ## fun: ns ## knots: 6.7 15.8 24.9 ## intercept: FALSE ## Boundary.knots: -2.4 34 ## ## BASIS FOR LAG: ## fun: ns ## knots: 1.011193 2.779473 7.639956 ## intercept: TRUE ## Boundary.knots: 0 21
#Build up the time series model formula <- total~cb+as.factor(dow)+ns(rh,3)+ns(day, 4*7) model <- gam(formula, data, family=quasipoisson, na.action="na.exclude") #Parsing and plotting pred.temp<- crosspred(cb, model, by=0.1, cen=median(data$temp, na.rm=TRUE)) #plot1 is shown below plot(pred.temp, "overall", col=2, cex.axis=1.50, xlab="temperature (C)" , ylab="RR", main="Overall effects of the temperature of the sample", cex.main=1.5)
##Find the minimum mortality temp which.min(pred.temp$allRRfit) #equal to MMT=26
## 25.9 ## 283
#Use 26 as the minimum temp and re-predict predcum<- crosspred(cb, model, cumul=TRUE, cen=26) #Re-plot, with reference temp (MMT) plot no.2 plot(predcum, "overall", col=2, cex.axis=1.50, xlab="temperature(C)", ylab="RR", main="Overall effect of the sample temperature", cex.main=1.5, cex.lab=1.5)
#Extract the value of risk of death corresponding to a temp, e.g. at extreme low and high temp2.5<- round(quantile(data$temp, probs=c(0.025), na.rm=TRUE), 0) temp97.5<- round(quantile(data$temp, probs=c(0.975), na.rm=TRUE),0) #Cumulative effects of all lagged days (RR, 95% CI), 21 days effects is chosen. Here RR of temp 2.5 and 97.5, including 95%CI were extracted round(cbind(predcum$allRRfit, predcum$allRRlow, predcum$allRRhigh) [c(as.character(temp2.5), as.character(temp97.5)),], digits=3)
## [,1] [,2] [,3] ## 2 1.644 1.383 1.955 ## 31 1.080 1.003 1.164
#same-day lagged effects of a specific temp on a specific lagged day # Ex. effect of extremely low temp (temp2.5) and extremely high temp (temp97.5)- effect of lag # on lag day 2 (lag2) round(cbind(predcum$matRRfit[as.character(temp2.5),]["lag2"], predcum$matRRlow[as.character(temp2.5),]["lag2"], predcum$matRRhigh[as.character(temp2.5),]["lag2"]), digits=3)
## [,1] [,2] [,3] ## lag2 1.138 1.102 1.175
#for extremely high temp round(cbind(predcum$matRRfit[as.character(temp97.5),]["lag2"], predcum$matRRlow[as.character(temp97.5),]["lag2"], predcum$matRRhigh[as.character(temp97.5),]["lag2"]), digits=3)
## [,1] [,2] [,3] ## lag2 0.988 0.97 1.007
#Cumulative effects of a specific temperature on certain lagged days, page 19 - effects of both lag and the temperature #Ex. extremely low and high temperatures lagged by 2 days (lag2) round(cbind(predcum$cumRRfit[as.character(temp2.5),]["lag2"], predcum$cumRRlow[as.character(temp2.5),]["lag2"], predcum$cumRRhigh[as.character(temp2.5),]["lag2"]), digits=3)
## [,1] [,2] [,3] ## lag2 0.884 0.826 0.945
round(cbind(predcum$cumRRfit[as.character(temp97.5),]["lag2"], predcum$cumRRlow[as.character(temp97.5),]["lag2"], predcum$cumRRhigh[as.character(temp97.5),]["lag2"]), digits=3)
## [,1] [,2] [,3] ## lag2 1.108 1.07 1.148
#Plot3 plot(predcum, "overall", col=2, cex.axis=1.50, xlab="temperature(oC)", ylab="RR", main ="overall temperature effects", cex.main=1.5,cex.lab=1.5) ### To add a reference line to the graph abline(v=c(temp2.5, temp97.5), lty=2) abline(v=25.9, lty=1)
#Plot the effect of a single day's lag for a spec. temperature. Plot 4 plot(predcum, "slices", var=temp2.5, col=2, cex.axis=1.50, xlab="lag day", ylab="RR", main="Lag effect at 2.5th percentile temperature", cex.main=1.5)
#Plot the cumulative lag effect for a specific temperature, Plot 5 plot(predcum, "slices", var= temp2.5, cumul=TRUE, col=2, cex.axis=1.50, xlab="lag days", ylab="RR",main="Cumulative lagged effect of temperatures at the 2.5th percentile of the sample",cex.main=1.5) ### To add a reference line to the graph abline(v=c(temp2.5, temp97.5), lty=2) abline(v=25.9, lty=1)
#Calculate attributable fraction ### attrdl function attrdl <- function(x,basis,cases,model=NULL,coef=NULL,vcov=NULL,type="af", dir="back",tot=TRUE,cen,range=NULL,sim=FALSE,nsim=5000) { # # CHECK VERSION OF THE DLNM PACKAGE if(packageVersion("dlnm")<"2.2.0") stop("update dlnm package to version >= 2.2.0") ## EXTRACT NAME AND CHECK type AND dir name <- deparse(substitute(basis)) type <- match.arg(type,c("an","af")) dir <- match.arg(dir,c("back","forw")) # # DEFINE CENTERING if(missing(cen) && is.null(cen <- attr(basis,"argvar")$cen)) stop("'cen' must be provided") if(!is.numeric(cen) && length(cen)>1L) stop("'cen' must be a numeric scalar") attributes(basis)$argvar$cen <- NULL ## SELECT RANGE (FORCE TO CENTERING VALUE OTHERWISE, MEANING NULL RISK) if(!is.null(range)) x[x<range[1]|x>range[2]] <- cen ## COMPUTE THE MATRIX OF # - LAGGED EXPOSURES IF dir="back" # - CONSTANT EXPOSURES ALONG LAGS IF dir="forw" lag <- attr(basis,"lag") if(NCOL(x)==1L) { at <- if(dir=="back") tsModel:::Lag(x,seq(lag[1],lag[2])) else matrix(rep(x,diff(lag)+1),length(x)) } else { if(dir=="forw") stop("'x' must be a vector when dir='forw'") if(ncol(at <- x)!=diff(lag)+1) stop("dimension of 'x' not compatible with 'basis'") } # # NUMBER USED FOR THE CONTRIBUTION AT EACH TIME IN FORWARD TYPE # - IF cases PROVIDED AS A MATRIX, TAKE THE ROW AVERAGE # - IF PROVIDED AS A TIME SERIES, COMPUTE THE FORWARD MOVING AVERAGE # - THIS EXCLUDES MISSING ACCORDINGLY # ALSO COMPUTE THE DENOMINATOR TO BE USED BELOW if(NROW(cases)!=NROW(at)) stop("'x' and 'cases' not consistent") if(NCOL(cases)>1L) { if(dir=="back") stop("'cases' must be a vector if dir='back'") if(ncol(cases)!=diff(lag)+1) stop("dimension of 'cases' not compatible") den <- sum(rowMeans(cases,na.rm=TRUE),na.rm=TRUE) cases <- rowMeans(cases) } else { den <- sum(cases,na.rm=TRUE) if(dir=="forw") cases <- rowMeans(as.matrix(tsModel:::Lag(cases,-seq(lag[1],lag[2])))) } # # EXTRACT COEF AND VCOV IF MODEL IS PROVIDED if(!is.null(model)) { cond <- paste0(name,"[[:print:]]*v[0-9]{1,2}\\.l[0-9]{1,2}") if(ncol(basis)==1L) cond <- name model.class <- class(model) coef <- dlnm:::getcoef(model,model.class) ind <- grep(cond,names(coef)) coef <- coef[ind] vcov <- dlnm:::getvcov(model,model.class)[ind,ind,drop=FALSE] model.link <- dlnm:::getlink(model,model.class) if(model.link!="log") stop("'model' must have a log link function") } # # IF REDUCED ESTIMATES ARE PROVIDED typebasis <- ifelse(length(coef)!=ncol(basis),"one","cb") # # PREPARE THE ARGUMENTS FOR TH BASIS TRANSFORMATION predvar <- if(typebasis=="one") x else seq(NROW(at)) predlag <- if(typebasis=="one") 0 else dlnm:::seqlag(lag) # # CREATE THE MATRIX OF TRANSFORMED CENTRED VARIABLES (DEPENDENT ON typebasis) if(typebasis=="cb") { Xpred <- dlnm:::mkXpred(typebasis,basis,at,predvar,predlag,cen) Xpredall <- 0 for (i in seq(length(predlag))) { ind <- seq(length(predvar))+length(predvar)*(i-1) Xpredall <- Xpredall + Xpred[ind,,drop=FALSE] } } else { basis <- do.call(onebasis,c(list(x=x),attr(basis,"argvar"))) Xpredall <- dlnm:::mkXpred(typebasis,basis,x,predvar,predlag,cen) } # # CHECK DIMENSIONS if(length(coef)!=ncol(Xpredall)) stop("arguments 'basis' do not match 'model' or 'coef'-'vcov'") if(any(dim(vcov)!=c(length(coef),length(coef)))) stop("arguments 'coef' and 'vcov' do no match") if(typebasis=="one" && dir=="back") stop("only dir='forw' allowed for reduced estimates") # # COMPUTE AF AND AN af <- 1-exp(-drop(as.matrix(Xpredall%*%coef))) an <- af*cases # # TOTAL # - SELECT NON-MISSING OBS CONTRIBUTING TO COMPUTATION # - DERIVE TOTAL AF # - COMPUTE TOTAL AN WITH ADJUSTED DENOMINATOR (OBSERVED TOTAL NUMBER) if(tot) { isna <- is.na(an) af <- sum(an[!isna])/sum(cases[!isna]) an <- af*den } # # EMPIRICAL CONFIDENCE INTERVALS if(!tot && sim) { sim <- FALSE warning("simulation samples only returned for tot=T") } if(sim) { # SAMPLE COEF k <- length(coef) eigen <- eigen(vcov) X <- matrix(rnorm(length(coef)*nsim),nsim) coefsim <- coef + eigen$vectors %*% diag(sqrt(eigen$values),k) %*% t(X) # RUN THE LOOP # pre_afsim <- (1 - exp(- Xpredall %*% coefsim)) * cases # a matrix # afsim <- colSums(pre_afsim,na.rm=TRUE) / sum(cases[!isna],na.rm=TRUE) afsim <- apply(coefsim,2, function(coefi) { ani <- (1-exp(-drop(Xpredall%*%coefi)))*cases sum(ani[!is.na(ani)])/sum(cases[!is.na(ani)]) }) ansim <- afsim*den } # res <- if(sim) { if(type=="an") ansim else afsim } else { if(type=="an") an else af } # return(res) } ##Build up the matrix and calculate the AF. matsim <- matrix(NA,1,3,dimnames=list("shanghai",c("glob","cold","heat"))) nsim <- 1000 matsim[1,"glob"] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28) matsim[1,"cold"] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28, range=c(-100,28)) matsim[1,"heat"] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28, range=c(28,100)) arraysim <- array(NA, dim = c(1, 3, nsim), dimnames = list("shanghai", c("glob", "cold", "heat"))) arraysim[1,"glob",] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28,sim=T,nsim=nsim) arraysim[1,"cold",] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28, range=c(-100,28),sim=T,nsim=nsim) arraysim[1,"heat",] <- attrdl(data$temp,cb,data$total,coef=predcum$coefficients, vcov=predcum$vcov,type="an",dir="forw",cen=28, range=c(28,100),sim=T,nsim=nsim) ## Calculating total deaths tottotal <- sum(data$total,na.rm=T) ancity <- matsim ancitylow <- apply(arraysim,c(1,2),quantile,0.025) ancityhigh <- apply(arraysim,c(1,2),quantile,0.975) rownames(ancity) <- rownames(ancitylow) <- rownames(ancityhigh) <- "shanghai" # Calculating AF afcity <- ancity/tottotal*100 afcitylow <- ancitylow/tottotal*100 afcityhigh <- ancityhigh/tottotal*100 print(afcity)
## glob cold heat ## shanghai 14.99528 14.45167 0.543611
afcitylow
## glob cold heat ## shanghai 7.692746 6.46287 0.1182538
afcityhigh
## glob cold heat ## shanghai 21.862 21.33376 0.9521218
Overall AF of temperature to death is 14.995% (95% CI: 7.261%, 21.142%) AF of low temperature to death is 14.452% (95% CI:6.853%, 21.198%); and high temperature 0.544% (95% CI: 0.108%, 0.958%).