Import data
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.5.3
library(survival)
## Warning: package 'survival' was built under R version 3.5.3
library(survminer)
## Warning: package 'survminer' was built under R version 3.5.3
## Loading required package: ggplot2
## Loading required package: ggpubr
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(timereg)
## Warning: package 'timereg' was built under R version 3.5.3
library(twang)
## Warning: package 'twang' was built under R version 3.5.3
## Loading required package: gbm
## Warning: package 'gbm' was built under R version 3.5.3
## Loaded gbm 2.1.5
## Loading required package: survey
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
## Loading required package: xtable
## Loading required package: lattice
## Loading required package: latticeExtra
## Warning: package 'latticeExtra' was built under R version 3.5.3
## Loading required package: RColorBrewer
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
library(survey)
library(readxl)
cancer <- read_excel("PREPSM_AllSites_2004Onwards_NEW2019.xlsx")
attach(cancer)
cancer_s=data.frame(cbind(age_youngold, AGE, SEX,RACE_RECODE,INSURANCE_STATUS,CDCC_TOTAL,MED_INC_QUAR_12, UR_new,
Primary_site_3grp,TNM_T_new,TNM_N_new,TUMORS_SIZE_NEW_CAT, HPVStatus, TreatmentGrp,
Primary_site_recode,FACILITY_TYPE_CD,
DX_LASTCONTACT_DEATH_MONTHS,PUF_VITAL_STATUS
))
summary(cancer_s)
## age_youngold AGE SEX RACE_RECODE
## Min. :0.000 Min. :18.0 Min. :1.000 Min. : 1.000
## 1st Qu.:2.000 1st Qu.:54.0 1st Qu.:1.000 1st Qu.: 1.000
## Median :3.000 Median :61.0 Median :1.000 Median : 1.000
## Mean :2.683 Mean :62.3 Mean :1.294 Mean : 1.741
## 3rd Qu.:4.000 3rd Qu.:70.0 3rd Qu.:2.000 3rd Qu.: 1.000
## Max. :4.000 Max. :90.0 Max. :2.000 Max. :98.000
## NA's :421
## INSURANCE_STATUS CDCC_TOTAL MED_INC_QUAR_12 UR_new
## Min. :0.00 Min. :0.0000 Min. :1.000 Min. :1.000
## 1st Qu.:1.00 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.000
## Median :5.00 Median :0.0000 Median :3.000 Median :1.000
## Mean :3.12 Mean :0.2552 Mean :2.765 Mean :1.197
## 3rd Qu.:5.00 3rd Qu.:0.0000 3rd Qu.:4.000 3rd Qu.:1.000
## Max. :9.00 Max. :2.0000 Max. :4.000 Max. :3.000
## NA's :221 NA's :1217
## Primary_site_3grp TNM_T_new TNM_N_new TUMORS_SIZE_NEW_CAT
## Min. :1.00 Min. :0.000 Min. :0.000 Min. : 0.0
## 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:992.0
## Median :1.00 Median :2.000 Median :0.000 Median :993.0
## Mean :1.42 Mean :1.748 Mean :0.494 Mean :991.7
## 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:994.0
## Max. :2.00 Max. :4.000 Max. :3.000 Max. :999.0
## NA's :10260 NA's :13934
## HPVStatus TreatmentGrp Primary_site_recode FACILITY_TYPE_CD
## Min. :0.000 Min. :1.000 Min. : 0.000 Min. :1.000
## 1st Qu.:0.000 1st Qu.:2.000 1st Qu.: 2.000 1st Qu.:2.000
## Median :2.000 Median :4.000 Median : 4.000 Median :3.000
## Mean :4.503 Mean :3.502 Mean : 4.359 Mean :2.655
## 3rd Qu.:9.000 3rd Qu.:5.000 3rd Qu.: 9.000 3rd Qu.:3.000
## Max. :9.000 Max. :5.000 Max. :10.000 Max. :4.000
## NA's :1190
## DX_LASTCONTACT_DEATH_MONTHS PUF_VITAL_STATUS
## Min. : 0.00 Min. :0.000
## 1st Qu.:16.39 1st Qu.:0.000
## Median :27.14 Median :1.000
## Mean :29.04 Mean :0.729
## 3rd Qu.:40.90 3rd Qu.:1.000
## Max. :71.33 Max. :1.000
## NA's :11714 NA's :11707
detach(cancer)
# Remove all cases with missing values
cancer_s1=cancer_s[complete.cases(cancer_s),]
# Make variables factor
cancer_s1$dead[cancer_s1$PUF_VITAL_STATUS==1]=0
cancer_s1$dead[cancer_s1$PUF_VITAL_STATUS==0]=1
cancer_s1$age_youngold=factor(cancer_s1$age_youngold)
cancer_s1$SEX=factor(cancer_s1$SEX)
cancer_s1$RACE_RECODE=factor(cancer_s1$RACE_RECODE)
cancer_s1$INSURANCE_STATUS=factor(cancer_s1$INSURANCE_STATUS)
cancer_s1$CDCC_TOTAL=factor(cancer_s1$CDCC_TOTAL)
cancer_s1$MED_INC_QUAR_12=factor(cancer_s1$MED_INC_QUAR_12)
cancer_s1$UR_new=factor(cancer_s1$UR_new)
cancer_s1$Primary_site_3grp=factor(cancer_s1$Primary_site_3grp)
cancer_s1$TNM_T_new=factor(cancer_s1$TNM_T_new)
cancer_s1$TNM_N_new=factor(cancer_s1$TNM_N_new)
cancer_s1$TUMORS_SIZE_NEW_CAT=factor(cancer_s1$TUMORS_SIZE_NEW_CAT)
cancer_s1$HPVStatus=factor(cancer_s1$HPVStatus)
cancer_s1$TreatmentGrp=factor(cancer_s1$TreatmentGrp)
cancer_s1$Primary_site_recode=factor(cancer_s1$Primary_site_recode)
cancer_s1$FACILITY_TYPE_CD=factor(cancer_s1$FACILITY_TYPE_CD)
summary(cancer_s1)
## age_youngold AGE SEX RACE_RECODE INSURANCE_STATUS
## 1:2231 Min. :40.00 1:14303 1 :19618 0: 953
## 2:6151 1st Qu.:55.00 2: 7025 2 : 1151 1: 8975
## 3:6311 Median :63.00 3 : 43 5:11072
## 4:6635 Mean :63.89 4 : 402 9: 328
## 3rd Qu.:72.00 98: 114
## Max. :90.00
##
## CDCC_TOTAL MED_INC_QUAR_12 UR_new Primary_site_3grp TNM_T_new
## 0:16665 1:3408 1:17317 1:14549 0: 84
## 1: 3598 2:5185 2: 3591 2: 6779 1:10368
## 2: 1065 3:5869 3: 420 2: 7999
## 4:6866 3: 2498
## 4: 379
##
##
## TNM_N_new TUMORS_SIZE_NEW_CAT HPVStatus TreatmentGrp Primary_site_recode
## 0:14706 992 :6170 0: 5729 1: 1245 2 :4864
## 1: 3604 993 :4666 2: 4290 2: 3745 9 :3708
## 2: 2491 991 :4222 3: 95 3: 2815 4 :2616
## 3: 527 994 :2860 9:11214 4: 3106 1 :2473
## 999 :1326 5:10417 6 :2285
## 995 :1235 0 :2029
## (Other): 849 (Other):3353
## FACILITY_TYPE_CD DX_LASTCONTACT_DEATH_MONTHS PUF_VITAL_STATUS
## 1: 1589 Min. : 0.00 Min. :0.0000
## 2: 7089 1st Qu.:17.48 1st Qu.:1.0000
## 3:10597 Median :28.25 Median :1.0000
## 4: 2053 Mean :29.90 Mean :0.7534
## 3rd Qu.:41.99 3rd Qu.:1.0000
## Max. :71.00 Max. :1.0000
##
## dead
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2466
## 3rd Qu.:0.0000
## Max. :1.0000
##
# Divide data into OP and OC subsets
# Recode variables
cancer_op=cancer_s1[cancer_s1$Primary_site_3grp==2, ]
cancer_op$Primary_site_recode=as.numeric(cancer_op$Primary_site_recode)
cancer_op$Primary_site_recode=as.factor(cancer_op$Primary_site_recode)
# 2 = base of tong, 8 = Tonsil, 9 = Orpharynx
cancer_op$HPVStatus=as.numeric(cancer_op$HPVStatus)
cancer_op$HPVStatus=as.factor(cancer_op$HPVStatus)
# 1 = HPV Neg, 2 = HPV Pos
cancer_oc=cancer_s1[cancer_s1$Primary_site_3grp==1, ]
cancer_oc$Primary_site_recode=as.numeric(cancer_oc$Primary_site_recode)
cancer_oc$Primary_site_recode=as.factor(cancer_oc$Primary_site_recode)
# 1 = lip, 3= other and unspecified parts of tongue, 4 = gum, 5 = floor of mouth
# 6 = palate, 7 = other and unspecified parts of mouth
KM curves before propensity score weighting
surv_object_op=Surv(time=cancer_op$DX_LASTCONTACT_DEATH_MONTHS,event=cancer_op$dead)
# K-M curve OP
fit_op=survfit(surv_object_op~age_youngold,data=cancer_op)
ggsurvplot(fit_op,data=cancer_op,pval=TRUE,
legend="bottom",legend.title="Age",legend.labs=c("40-49","50-59","60-69","70+"),
risk.table = TRUE, tables.theme = theme_cleantable())

surv_object_oc=Surv(time=cancer_oc$DX_LASTCONTACT_DEATH_MONTHS,event=cancer_oc$dead)
# K-M curve for OC
fit_oc=survfit(surv_object_oc~age_youngold,data=cancer_oc)
ggsurvplot(fit_oc,data=cancer_oc,pval=TRUE,
legend="bottom",legend.title="Age",legend.labs=c("40-49","50-59","60-69","70+"),
risk.table = TRUE, tables.theme = theme_cleantable())

Propensity score weighting and balanace checking
set.seed(1)
## OP
mnps.op <- mnps(age_youngold ~ SEX+RACE_RECODE+CDCC_TOTAL+MED_INC_QUAR_12+UR_new
+TNM_T_new+TNM_N_new+TUMORS_SIZE_NEW_CAT+TreatmentGrp+Primary_site_recode+HPVStatus,
data = cancer_op,
estimand = "ATE",
verbose = FALSE,
stop.method = c("es.mean"),
n.trees = 5000)
table_op=bal.table(mnps.op,digits=2)
write.csv(table_op, file = "table_op_new.csv",row.names=T, na="")
table_op_cov=bal.table(mnps.op,digits = 2,collapse.to = "covariate")
write.csv(table_op_cov, file = "table_op_cov_new.csv",row.names=T, na="")
## OC
mnps.oc <- mnps(age_youngold ~ SEX+RACE_RECODE+CDCC_TOTAL+MED_INC_QUAR_12+UR_new
+TNM_T_new+TNM_N_new+TUMORS_SIZE_NEW_CAT+TreatmentGrp+Primary_site_recode,
data = cancer_oc,
estimand = "ATE",
verbose = FALSE,
stop.method = c("es.mean"),
n.trees = 5000)
table_oc=bal.table(mnps.oc,digits=2)
write.csv(table_oc, file = "table_oc_new.csv",row.names=T, na="")
table_oc_cov=bal.table(mnps.oc,digits = 2,collapse.to = "covariate")
write.csv(table_op_cov, file = "table_op_cov_new.csv",row.names=T, na="")
#plot(mnps.oc,plots=1)
#plot(mnps.oc,plots=2)
#plot(mnps.oc,plots=3,pairwiseMax = F)
#plot(mnps.oc,plots=4)
Analysis on propensity score weighted data
# Cox regression for OP
cancer_op$weight=get.weights(mnps.op,stop.method="es.mean")
surv_op=Surv(cancer_op$DX_LASTCONTACT_DEATH_MONTHS,event=cancer_op$PUF_VITAL_STATUS)
fit_cox_op1=coxph(Surv(DX_LASTCONTACT_DEATH_MONTHS,PUF_VITAL_STATUS==1)~age_youngold,
data=cancer_op,weights=cancer_op$weight)
summary(fit_cox_op1)
## Call:
## coxph(formula = Surv(DX_LASTCONTACT_DEATH_MONTHS, PUF_VITAL_STATUS ==
## 1) ~ age_youngold, data = cancer_op, weights = cancer_op$weight)
##
## n= 6779, number of events= 5635
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age_youngold2 -0.01323 0.98686 0.01904 -0.695 0.4873
## age_youngold3 0.04102 1.04188 0.01915 2.142 0.0322 *
## age_youngold4 0.07800 1.08113 0.02005 3.890 0.0001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age_youngold2 0.9869 1.0133 0.9507 1.024
## age_youngold3 1.0419 0.9598 1.0035 1.082
## age_youngold4 1.0811 0.9250 1.0395 1.124
##
## Concordance= 0.509 (se = 0.005 )
## Likelihood ratio test= 25.67 on 3 df, p=1e-05
## Wald test = 25.8 on 3 df, p=1e-05
## Score (logrank) test = 25.81 on 3 df, p=1e-05
# Check proportional hazards assumption
temp_op1=cox.zph(fit_cox_op1,global=T)
print(temp_op1)
## rho chisq p
## age_youngold2 0.002191 0.006374 0.936
## age_youngold3 -0.006517 0.052790 0.818
## age_youngold4 -0.000678 0.000397 0.984
## GLOBAL NA 0.105392 0.991
# Assumption is satisfied
# Cox regression for OC
cancer_oc$weight=get.weights(mnps.oc,stop.method="es.mean")
surv_oc=Surv(cancer_oc$DX_LASTCONTACT_DEATH_MONTHS,event=cancer_oc$PUF_VITAL_STATUS)
fit_cox_oc=coxph(Surv(DX_LASTCONTACT_DEATH_MONTHS,PUF_VITAL_STATUS==1)~age_youngold,
data=cancer_oc,weights=cancer_oc$weight)
summary(fit_cox_oc)
## Call:
## coxph(formula = Surv(DX_LASTCONTACT_DEATH_MONTHS, PUF_VITAL_STATUS ==
## 1) ~ age_youngold, data = cancer_oc, weights = cancer_oc$weight)
##
## n= 14549, number of events= 10433
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age_youngold2 -0.02092 0.97929 0.01335 -1.567 0.117115
## age_youngold3 -0.04772 0.95340 0.01350 -3.535 0.000408 ***
## age_youngold4 -0.09694 0.90761 0.01429 -6.784 1.16e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age_youngold2 0.9793 1.021 0.9540 1.0053
## age_youngold3 0.9534 1.049 0.9285 0.9790
## age_youngold4 0.9076 1.102 0.8825 0.9334
##
## Concordance= 0.511 (se = 0.004 )
## Likelihood ratio test= 51.12 on 3 df, p=5e-11
## Wald test = 50.75 on 3 df, p=6e-11
## Score (logrank) test = 50.78 on 3 df, p=5e-11
# Check proportional hazards assumption
temp_oc1=cox.zph(fit_cox_oc,global=T)
print(temp_oc1)
## rho chisq p
## age_youngold2 0.004723 0.037888 0.846
## age_youngold3 0.001057 0.002058 0.964
## age_youngold4 -0.000363 0.000309 0.986
## GLOBAL NA 0.053655 0.997
# Assumption is satisfied