rm(list=ls())


# Load libraries ---------------------------

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(geeM)
## Loading required package: Matrix
library(magrittr)


# Data location for reading datasets and storing data ---------------------------

data_loc <- "/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/RESOURCES-AK-R"

# Read Data ---------------------------

## AVCIG, PCSMKD
raw_dt_avcig_pcsmkd <- read.csv("/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/TLDVSN243.csv")

# Specify corstr = ar1 ---------------------------
corstr = "ar1"


# DV = PCSMKD ---------------------------

# create covariates dt
PCSMKD_nobl_dt <- 
  cbind.data.frame(
    PCSMKD = raw_dt_avcig_pcsmkd$PCSMKD,
    cAGE = raw_dt_avcig_pcsmkd$AGE-mean(raw_dt_avcig_pcsmkd$AGE),
    cBLCGSMD = raw_dt_avcig_pcsmkd$BLCGSMD - mean(raw_dt_avcig_pcsmkd$BLCGSMD),
    MI = as.factor(raw_dt_avcig_pcsmkd$MI),
    CBT = as.factor(raw_dt_avcig_pcsmkd$CBT),
    TIME=as.factor(raw_dt_avcig_pcsmkd$TIME),
    #CBT=as.factor(raw_dt_avcig_pcsmkd$CBT),
    M3ENVDAYS = raw_dt_avcig_pcsmkd$M3ENVDAYS,
    M6ENVDAYS = raw_dt_avcig_pcsmkd$M6ENVDAYS ,
    MIBYCBT = as.factor(raw_dt_avcig_pcsmkd$MIBYCBT),
    MIBYTIME = as.factor(raw_dt_avcig_pcsmkd$MIBYTIME),
    CBTBYTIME = as.factor(raw_dt_avcig_pcsmkd$CBTBYTIME),
    MIBYCBTBYTIME = as.factor(raw_dt_avcig_pcsmkd$MIBYCBTBYTIME),
    id = raw_dt_avcig_pcsmkd$id,
    log_unctrldays = raw_dt_avcig_pcsmkd$log_unctrldays
  )

#relevel MI and CBT
PCSMKD_nobl_dt$MI_ref1 <- PCSMKD_nobl_dt$MI %>% relevel("1")
PCSMKD_nobl_dt$CBT_ref1 <- PCSMKD_nobl_dt$CBT %>% relevel("1")
PCSMKD_nobl_dt$TIME_ref1 <- PCSMKD_nobl_dt$TIME %>% relevel("1")

# ignore missing values
## create an indicator column for any missing values 
PCSMKD_nobl_dt$any_na <- PCSMKD_nobl_dt %>% apply(1, function(x){any(is.na(x))}) 

# ## left join the dataset by the id column using the group_by() function
PCSMKD_nobl_dt %<>% left_join(PCSMKD_nobl_dt %>% group_by(id) %>% 
                                summarise(any_na2 = any(any_na)), by="id")


# ## filter out rows with any NAs
PCSMKD_nobl_dt %<>% filter(any_na2 != T)

# modeling
## fit main model
PCSMKD_nobl_main <- geem(formula =   
                           PCSMKD~
                           cAGE +
                           cBLCGSMD+
                           MI +
                           CBT +
                           TIME ,
                         data = PCSMKD_nobl_dt,
                         id=id,
                         family = MASS::negative.binomial(1),
                         corstr = corstr
)


## print main model summary
summary(PCSMKD_nobl_main) #ref: Mi=0, Cbt=0
##             Estimates Model SE Robust SE    wald        p
## (Intercept)  4.332000 0.045770  0.046900 92.3700 0.000000
## cAGE         0.013610 0.022390  0.023650  0.5755 0.564900
## cBLCGSMD     0.008767 0.002774  0.003216  2.7260 0.006414
## MI1          0.102300 0.049550  0.048600  2.1050 0.035310
## CBT1        -0.055010 0.049770  0.048560 -1.1330 0.257300
## TIME1        0.027570 0.024220  0.024300  1.1340 0.256600
## 
##  Estimated Correlation Parameter:  0.6129 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1818 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
## fit 3-way interaction model
PCSMKD_nobl_interaction <- 
  geem(formula =   
         PCSMKD~
         cAGE +
         cBLCGSMD+
         MI +
         CBT +
         TIME + 
         MI*CBT+
         MI*TIME+
         CBT*TIME+
         MI*CBT*TIME,
       #ref: Mi=0, Cbt=0, time=0
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_TIMEref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI + CBT + TIME_ref1 + MI * CBT + MI * 
         TIME_ref1 + CBT * TIME_ref1 + MI * CBT * TIME_ref1,
       #ref: Mi=0, Cbt=0, time=1
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_MIref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME + MI_ref1 * CBT + MI_ref1 * 
         TIME + CBT * TIME + MI_ref1 * CBT * TIME,
       #ref: Mi=1, Cbt=0, time=0
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_MIref1_TIMEref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME_ref1 + MI_ref1 * CBT + MI_ref1 * 
         TIME_ref1 + CBT * TIME_ref1 + MI_ref1 * CBT * TIME_ref1,
       #ref: Mi=1, Cbt=0, time=1
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_CBTref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME + MI * CBT_ref1 + MI * 
         TIME + CBT_ref1 * TIME + MI * CBT_ref1 * TIME,
       #ref: Mi=0, Cbt=1, time=0
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_CBTref1_TIMEref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME_ref1 + MI * CBT_ref1 + MI * 
         TIME_ref1 + CBT_ref1 * TIME_ref1 + MI * CBT_ref1 * TIME_ref1,
       #ref: Mi=0, Cbt=1, time=1
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_MIref1_CBTref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME + CBT_ref1 * TIME + MI_ref1 * CBT_ref1 * TIME,
       #ref: Mi=1, Cbt=1, time=0
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

PCSMKD_nobl_interaction_MIref1_CBTref1_TIMEref1 <- 
  geem(formula = PCSMKD ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME_ref1 + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME_ref1 + CBT_ref1 * TIME_ref1 + MI_ref1 * CBT_ref1 * TIME_ref1,
       #ref: Mi=1, Cbt=1, time=1
       data = PCSMKD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

## print interaction model summary
summary(PCSMKD_nobl_interaction) #base: MI=0, CBT=0, time=0
##                 Estimates Model SE Robust SE      wald        p
## (Intercept)     4.3290000  0.05795  0.064050 67.580000 0.000000
## cAGE            0.0146700  0.02263  0.023800  0.616200 0.537800
## cBLCGSMD        0.0090460  0.00281  0.003239  2.793000 0.005224
## MI1             0.0676200  0.08316  0.081390  0.830700 0.406100
## CBT1           -0.0186100  0.07880  0.085690 -0.217200 0.828100
## TIME1           0.0789700  0.04993  0.055900  1.413000 0.157800
## MI1:CBT1       -0.0001742  0.11190  0.112500 -0.001549 0.998800
## MI1:TIME1      -0.0253200  0.07158  0.069030 -0.366800 0.713800
## CBT1:TIME1     -0.1607000  0.06790  0.073140 -2.197000 0.027990
## MI1:CBT1:TIME1  0.1746000  0.09640  0.096000  1.819000 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_TIMEref1) #base: MI=0, CBT=0, time=1
##                     Estimates Model SE Robust SE    wald        p
## (Intercept)          4.408000  0.05792  0.055010 80.1300 0.000000
## cAGE                 0.014670  0.02263  0.023800  0.6162 0.537800
## cBLCGSMD             0.009046  0.00281  0.003239  2.7930 0.005224
## MI1                  0.042290  0.08313  0.069030  0.6126 0.540100
## CBT1                -0.179300  0.07880  0.086900 -2.0640 0.039060
## TIME_ref10          -0.078970  0.04993  0.055900 -1.4130 0.157800
## MI1:CBT1             0.174400  0.11190  0.105600  1.6520 0.098580
## MI1:TIME_ref10       0.025320  0.07158  0.069030  0.3668 0.713800
## CBT1:TIME_ref10      0.160700  0.06790  0.073140  2.1970 0.027990
## MI1:CBT1:TIME_ref10 -0.174600  0.09640  0.096000 -1.8190 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_MIref1) #ref: MI=1, CBT=0, time=0
##                      Estimates Model SE Robust SE      wald        p
## (Intercept)          4.3970000  0.05933  0.050020 87.900000 0.000000
## cAGE                 0.0146700  0.02263  0.023800  0.616200 0.537800
## cBLCGSMD             0.0090460  0.00281  0.003239  2.793000 0.005224
## MI_ref10            -0.0676200  0.08316  0.081390 -0.830700 0.406100
## CBT1                -0.0187800  0.07911  0.072500 -0.259100 0.795500
## TIME1                0.0536500  0.05130  0.040500  1.325000 0.185300
## MI_ref10:CBT1        0.0001742  0.11190  0.112500  0.001549 0.998800
## MI_ref10:TIME1       0.0253200  0.07158  0.069030  0.366800 0.713800
## CBT1:TIME1           0.0138500  0.06843  0.062170  0.222800 0.823700
## MI_ref10:CBT1:TIME1 -0.1746000  0.09640  0.096000 -1.819000 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_MIref1_TIMEref1) #ref: MI=1, CBT=0, time=1
##                          Estimates Model SE Robust SE      wald        p
## (Intercept)               4.450000  0.05931  0.041030 108.40000 0.000000
## cAGE                      0.014670  0.02263  0.023800   0.61620 0.537800
## cBLCGSMD                  0.009046  0.00281  0.003239   2.79300 0.005224
## MI_ref10                 -0.042290  0.08313  0.069030  -0.61260 0.540100
## CBT1                     -0.004933  0.07908  0.059260  -0.08323 0.933700
## TIME_ref10               -0.053650  0.05130  0.040500  -1.32500 0.185300
## MI_ref10:CBT1            -0.174400  0.11190  0.105600  -1.65200 0.098580
## MI_ref10:TIME_ref10      -0.025320  0.07158  0.069030  -0.36680 0.713800
## CBT1:TIME_ref10          -0.013850  0.06843  0.062170  -0.22280 0.823700
## MI_ref10:CBT1:TIME_ref10  0.174600  0.09640  0.096000   1.81900 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_CBTref1) #ref: MI=0, CBT=1, time=0
##                      Estimates Model SE Robust SE      wald        p
## (Intercept)          4.3100000  0.05317  0.056210 76.680000 0.000000
## cAGE                 0.0146700  0.02263  0.023800  0.616200 0.537800
## cBLCGSMD             0.0090460  0.00281  0.003239  2.793000 0.005224
## MI1                  0.0674400  0.07462  0.076760  0.878600 0.379600
## CBT_ref10            0.0186100  0.07880  0.085690  0.217200 0.828100
## TIME1               -0.0817500  0.04601  0.047170 -1.733000 0.083050
## MI1:CBT_ref10        0.0001742  0.11190  0.112500  0.001549 0.998800
## MI1:TIME1            0.1492000  0.06457  0.066710  2.237000 0.025260
## CBT_ref10:TIME1      0.1607000  0.06790  0.073140  2.197000 0.027990
## MI1:CBT_ref10:TIME1 -0.1746000  0.09640  0.096000 -1.819000 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_CBTref1_TIMEref1) #ref: MI=0, CBT=1, time=1
##                          Estimates Model SE Robust SE    wald        p
## (Intercept)               4.229000  0.05320  0.066460 63.6200 0.000000
## cAGE                      0.014670  0.02263  0.023800  0.6162 0.537800
## cBLCGSMD                  0.009046  0.00281  0.003239  2.7930 0.005224
## MI1                       0.216700  0.07462  0.078930  2.7450 0.006043
## CBT_ref10                 0.179300  0.07880  0.086900  2.0640 0.039060
## TIME_ref10                0.081750  0.04601  0.047170  1.7330 0.083050
## MI1:CBT_ref10            -0.174400  0.11190  0.105600 -1.6520 0.098580
## MI1:TIME_ref10           -0.149200  0.06457  0.066710 -2.2370 0.025260
## CBT_ref10:TIME_ref10     -0.160700  0.06790  0.073140 -2.1970 0.027990
## MI1:CBT_ref10:TIME_ref10  0.174600  0.09640  0.096000  1.8190 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_MIref1_CBTref1) #ref: MI=1, CBT=1, time=0
##                           Estimates Model SE Robust SE      wald        p
## (Intercept)               4.3780000  0.05232  0.052660 83.130000 0.000000
## cAGE                      0.0146700  0.02263  0.023800  0.616200 0.537800
## cBLCGSMD                  0.0090460  0.00281  0.003239  2.793000 0.005224
## MI_ref10                 -0.0674400  0.07462  0.076760 -0.878600 0.379600
## CBT_ref10                 0.0187800  0.07911  0.072500  0.259100 0.795500
## TIME1                     0.0675000  0.04529  0.047170  1.431000 0.152500
## MI_ref10:CBT_ref10       -0.0001742  0.11190  0.112500 -0.001549 0.998800
## MI_ref10:TIME1           -0.1492000  0.06457  0.066710 -2.237000 0.025260
## CBT_ref10:TIME1          -0.0138500  0.06843  0.062170 -0.222800 0.823700
## MI_ref10:CBT_ref10:TIME1  0.1746000  0.09640  0.096000  1.819000 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(PCSMKD_nobl_interaction_MIref1_CBTref1_TIMEref1) #ref: MI=1, CBT=1, time=1
##                               Estimates Model SE Robust SE      wald        p
## (Intercept)                    4.445000  0.05230  0.043090 103.20000 0.000000
## cAGE                           0.014670  0.02263  0.023800   0.61620 0.537800
## cBLCGSMD                       0.009046  0.00281  0.003239   2.79300 0.005224
## MI_ref10                      -0.216700  0.07462  0.078930  -2.74500 0.006043
## CBT_ref10                      0.004933  0.07908  0.059260   0.08323 0.933700
## TIME_ref10                    -0.067500  0.04529  0.047170  -1.43100 0.152500
## MI_ref10:CBT_ref10             0.174400  0.11190  0.105600   1.65200 0.098580
## MI_ref10:TIME_ref10            0.149200  0.06457  0.066710   2.23700 0.025260
## CBT_ref10:TIME_ref10           0.013850  0.06843  0.062170   0.22280 0.823700
## MI_ref10:CBT_ref10:TIME_ref10 -0.174600  0.09640  0.096000  -1.81900 0.068990
## 
##  Estimated Correlation Parameter:  0.6249 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.1837 
## 
##  Number of GEE iterations: 2 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486