rm(list=ls())


# Load libraries ---------------------------

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(geeM)
## Loading required package: Matrix
library(magrittr)


# Data location for reading datasets and storing data ---------------------------

data_loc <- "/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/RESOURCES-AK-R"

# Read Data ---------------------------

## AVCIG, PCSMKD
raw_dt_avcig_pcsmkd <- read.csv("/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/TLDVSN243.csv")


# Specify corstr = ar1 ---------------------------
corstr = "ar1"


# Covariates for 2 time points (avcigs, PCSMKD) ---------------------------
covariates_nobl_dt <- 
  cbind.data.frame(
    cAGE = raw_dt_avcig_pcsmkd$AGE-mean(raw_dt_avcig_pcsmkd $AGE),
    cBLCGSMD = raw_dt_avcig_pcsmkd$BLCGSMD - mean(raw_dt_avcig_pcsmkd$BLCGSMD),
    MI = as.factor(raw_dt_avcig_pcsmkd $MI),
    CBT = as.factor(raw_dt_avcig_pcsmkd $CBT),
    TIME=as.factor(raw_dt_avcig_pcsmkd $TIME),
    M3ENVDAYS = raw_dt_avcig_pcsmkd $M3ENVDAYS,
    M6ENVDAYS = raw_dt_avcig_pcsmkd $M6ENVDAYS ,
    MIBYCBT = as.factor(raw_dt_avcig_pcsmkd $MIBYCBT),
    MIBYTIME = as.factor(raw_dt_avcig_pcsmkd $MIBYTIME),
    CBTBYTIME = as.factor(raw_dt_avcig_pcsmkd $CBTBYTIME),
    MIBYCBTBYTIME = as.factor(raw_dt_avcig_pcsmkd $MIBYCBTBYTIME),
    id = raw_dt_avcig_pcsmkd $id,
    log_unctrldays = raw_dt_avcig_pcsmkd $log_unctrldays
  )


# DV = AVCIG ---------------------------


# create covariates dt
AVCIG_nobl_dt <- 
  cbind.data.frame(AVCIG=raw_dt_avcig_pcsmkd $AVCIG,
                   covariates_nobl_dt)

#relevel MI and CBT
AVCIG_nobl_dt$MI_ref1 <- AVCIG_nobl_dt$MI %>% relevel("1")
AVCIG_nobl_dt$CBT_ref1 <- AVCIG_nobl_dt$CBT %>% relevel("1")
AVCIG_nobl_dt$TIME_ref1 <- AVCIG_nobl_dt$TIME %>% relevel("1")

# ignore missing values
## create an indicator column for any missing values 
AVCIG_nobl_dt$any_na <- AVCIG_nobl_dt %>% apply(1, function(x){any(is.na(x))}) 

## left join the dataset by the id column using the group_by() function
AVCIG_nobl_dt %<>% left_join(AVCIG_nobl_dt %>% group_by(id) %>% 
                               summarise(any_na2 = any(any_na)), by="id")


## filter out rows with any NAs
AVCIG_nobl_dt %<>% filter(any_na2 != T)

# modeling
## main model
AVCIG_nobl_main <- geem(formula =   
                          AVCIG~
                          cAGE +
                          cBLCGSMD+
                          MI +
                          CBT +
                          TIME,
                        data = AVCIG_nobl_dt,
                        id=id,
                        family = MASS::negative.binomial(1),
                        corstr = corstr
)


## p  rint main model summary
summary(AVCIG_nobl_main)
##             Estimates Model SE Robust SE     wald        p
## (Intercept)  1.769000 0.105000  0.113100 15.64000 0.000000
## cAGE        -0.016860 0.051810  0.059350 -0.28400 0.776400
## cBLCGSMD     0.048880 0.006252  0.005437  8.98900 0.000000
## MI1          0.074780 0.114600  0.113400  0.65950 0.509500
## CBT1         0.004513 0.115100  0.113200  0.03988 0.968200
## TIME1        0.114100 0.043960  0.044200  2.58200 0.009817
## 
##  Estimated Correlation Parameter:  0.7426 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.783 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
## interaction model
AVCIG_nobl_interaction <- geem(formula =   
                                 AVCIG~
                                 cAGE +
                                 cBLCGSMD+
                                 MI +
                                 CBT +
                                 TIME + 
                                 MI*CBT+
                                 MI*TIME+
                                 CBT*TIME+
                                 MI*CBT*TIME,
                               #mi=0, cbt=0, time=0
                               data = AVCIG_nobl_dt,
                               id=id,
                               family = MASS::negative.binomial(1),
                               corstr = corstr)

AVCIG_nobl_interaction_TIMEref1 <- geem(formula = 
                                          AVCIG ~ cAGE + cBLCGSMD + MI + CBT + TIME_ref1 + MI * CBT + MI * TIME_ref1 + 
                                          CBT * TIME_ref1 + MI * CBT * TIME_ref1,
                                        #mi=0, cbt=0, time=1
                                        data = AVCIG_nobl_dt,
                                        id=id,
                                        family = MASS::negative.binomial(1),
                                        corstr = corstr
)


AVCIG_nobl_interaction_MIref1 <- 
  geem(formula = AVCIG ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME + MI_ref1 * CBT + MI_ref1 * TIME + 
         CBT * TIME + MI_ref1 * CBT * TIME,
       #ref: Mi=1, Cbt=0, Time=0
       data = AVCIG_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

AVCIG_nobl_interaction_MIref1_TIMEref1 <- 
  geem(formula = AVCIG ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME_ref1 + 
         MI_ref1 * CBT + MI_ref1 * TIME_ref1 + 
         CBT * TIME_ref1 + MI_ref1 * CBT * TIME_ref1,
       #ref: Mi=1, Cbt=0, Time=1
       data = AVCIG_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

AVCIG_nobl_interaction_CBTref1 <- 
  geem(formula = AVCIG ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME + MI * CBT_ref1 + MI * 
         TIME + CBT_ref1 * TIME + MI * CBT_ref1 * TIME,
       #ref: Mi=0, Cbt=1, Time = 0
       data = AVCIG_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

AVCIG_nobl_interaction_CBTref1_TIMEref1 <- 
  geem(formula = AVCIG ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME_ref1 + 
         MI * CBT_ref1 + MI * TIME_ref1 + CBT_ref1 * TIME_ref1 + MI * CBT_ref1 * TIME_ref1,
       #ref: Mi=0, Cbt=1, Time = 1
       data = AVCIG_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

AVCIG_nobl_interaction_MIref1_CBTref1 <- 
  geem(formula = AVCIG ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME + CBT_ref1 * TIME + MI_ref1 * CBT_ref1 * TIME,
       #ref: Mi=1, Cbt=1, Time=0
       data = AVCIG_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

AVCIG_nobl_interaction_MIref1_CBTref1_TIMEref1 <- 
  geem(formula = AVCIG ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME_ref1 + 
         MI_ref1 * CBT_ref1 + MI_ref1 * TIME_ref1 + 
         CBT_ref1 * TIME_ref1 + MI_ref1 * CBT_ref1 * TIME_ref1,
       #ref: Mi=1, Cbt=1, Time=1
       data = AVCIG_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )


## print interaction model summary
summary(AVCIG_nobl_interaction) #base: MI=0, CBT=0, Time=0 
##                Estimates Model SE Robust SE    wald      p
## (Intercept)      1.83700 0.128600  0.142300 12.9100 0.0000
## cAGE            -0.01556 0.051830  0.059580 -0.2612 0.7939
## cBLCGSMD         0.05017 0.006273  0.005404  9.2830 0.0000
## MI1             -0.12750 0.185100  0.187200 -0.6808 0.4960
## CBT1            -0.10980 0.175300  0.178300 -0.6159 0.5380
## TIME1            0.10810 0.090540  0.072970  1.4820 0.1384
## MI1:CBT1         0.32920 0.248600  0.242500  1.3580 0.1746
## MI1:TIME1        0.12670 0.129800  0.133300  0.9506 0.3418
## CBT1:TIME1      -0.01690 0.123300  0.105900 -0.1595 0.8733
## MI1:CBT1:TIME1  -0.16000 0.174600  0.177700 -0.9005 0.3678
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_TIMEref1) #base: MI=0, CBT=0, Time=1
##                      Estimates Model SE Robust SE      wald      p
## (Intercept)          1.9450000 0.127600  0.118800 16.380000 0.0000
## cAGE                -0.0155600 0.051830  0.059580 -0.261200 0.7939
## cBLCGSMD             0.0501700 0.006273  0.005404  9.283000 0.0000
## MI1                 -0.0007346 0.182900  0.184900 -0.003973 0.9968
## CBT1                -0.1267000 0.174000  0.165800 -0.764300 0.4447
## TIME_ref10          -0.1081000 0.090540  0.072970 -1.482000 0.1384
## MI1:CBT1             0.1692000 0.246400  0.243600  0.694600 0.4873
## MI1:TIME_ref10      -0.1267000 0.129800  0.133300 -0.950600 0.3418
## CBT1:TIME_ref10      0.0169000 0.123300  0.105900  0.159500 0.8733
## MI1:CBT1:TIME_ref10  0.1600000 0.174600  0.177700  0.900500 0.3678
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_MIref1) #ref: MI=1, CBT=0, Time=0
##                     Estimates Model SE Robust SE    wald       p
## (Intercept)           1.71000 0.132400  0.120900 14.1400 0.00000
## cAGE                 -0.01556 0.051830  0.059580 -0.2612 0.79390
## cBLCGSMD              0.05017 0.006273  0.005404  9.2830 0.00000
## MI_ref10              0.12750 0.185100  0.187200  0.6808 0.49600
## CBT1                  0.21940 0.175400  0.164900  1.3310 0.18330
## TIME1                 0.23490 0.092980  0.111600  2.1050 0.03526
## MI_ref10:CBT1        -0.32920 0.248600  0.242500 -1.3580 0.17460
## MI_ref10:TIME1       -0.12670 0.129800  0.133300 -0.9506 0.34180
## CBT1:TIME1           -0.17690 0.123700  0.142700 -1.2400 0.21500
## MI_ref10:CBT1:TIME1   0.16000 0.174600  0.177700  0.9005 0.36780
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_MIref1_TIMEref1) #ref: MI=1, CBT=0, Time=1
##                           Estimates Model SE Robust SE      wald       p
## (Intercept)               1.9450000 0.130200  0.138900 14.000000 0.00000
## cAGE                     -0.0155600 0.051830  0.059580 -0.261200 0.79390
## cBLCGSMD                  0.0501700 0.006273  0.005404  9.283000 0.00000
## MI_ref10                  0.0007346 0.182900  0.184900  0.003973 0.99680
## CBT1                      0.0424900 0.173500  0.176600  0.240600 0.80990
## TIME_ref10               -0.2349000 0.092980  0.111600 -2.105000 0.03526
## MI_ref10:CBT1            -0.1692000 0.246400  0.243600 -0.694600 0.48730
## MI_ref10:TIME_ref10       0.1267000 0.129800  0.133300  0.950600 0.34180
## CBT1:TIME_ref10           0.1769000 0.123700  0.142700  1.240000 0.21500
## MI_ref10:CBT1:TIME_ref10 -0.1600000 0.174600  0.177700 -0.900500 0.36780
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_CBTref1) #ref: MI=0, CBT=1, Time=0
##                     Estimates Model SE Robust SE    wald      p
## (Intercept)           1.72700 0.118500  0.109200 15.8100 0.0000
## cAGE                 -0.01556 0.051830  0.059580 -0.2612 0.7939
## cBLCGSMD              0.05017 0.006273  0.005404  9.2830 0.0000
## MI1                   0.20170 0.165200  0.156900  1.2860 0.1985
## CBT_ref10             0.10980 0.175300  0.178300  0.6159 0.5380
## TIME1                 0.09123 0.083630  0.076780  1.1880 0.2348
## MI1:CBT_ref10        -0.32920 0.248600  0.242500 -1.3580 0.1746
## MI1:TIME1            -0.03328 0.116800  0.117500 -0.2832 0.7770
## CBT_ref10:TIME1       0.01690 0.123300  0.105900  0.1595 0.8733
## MI1:CBT_ref10:TIME1   0.16000 0.174600  0.177700  0.9005 0.3678
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_CBTref1_TIMEref1) #ref: MI=0, CBT=1, Time=1
##                          Estimates Model SE Robust SE    wald      p
## (Intercept)                1.81900 0.117700  0.116100 15.6700 0.0000
## cAGE                      -0.01556 0.051830  0.059580 -0.2612 0.7939
## cBLCGSMD                   0.05017 0.006273  0.005404  9.2830 0.0000
## MI1                        0.16840 0.164300  0.159800  1.0540 0.2918
## CBT_ref10                  0.12670 0.174000  0.165800  0.7643 0.4447
## TIME_ref10                -0.09123 0.083630  0.076780 -1.1880 0.2348
## MI1:CBT_ref10             -0.16920 0.246400  0.243600 -0.6946 0.4873
## MI1:TIME_ref10             0.03328 0.116800  0.117500  0.2832 0.7770
## CBT_ref10:TIME_ref10      -0.01690 0.123300  0.105900 -0.1595 0.8733
## MI1:CBT_ref10:TIME_ref10  -0.16000 0.174600  0.177700 -0.9005 0.3678
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_MIref1_CBTref1) #ref: MI=1, CBT=1, Time=0
##                          Estimates Model SE Robust SE    wald      p
## (Intercept)                1.92900 0.115100  0.113400 17.0100 0.0000
## cAGE                      -0.01556 0.051830  0.059580 -0.2612 0.7939
## cBLCGSMD                   0.05017 0.006273  0.005404  9.2830 0.0000
## MI_ref10                  -0.20170 0.165200  0.156900 -1.2860 0.1985
## CBT_ref10                 -0.21940 0.175400  0.164900 -1.3310 0.1833
## TIME1                      0.05795 0.081550  0.088910  0.6518 0.5145
## MI_ref10:CBT_ref10         0.32920 0.248600  0.242500  1.3580 0.1746
## MI_ref10:TIME1             0.03328 0.116800  0.117500  0.2832 0.7770
## CBT_ref10:TIME1            0.17690 0.123700  0.142700  1.2400 0.2150
## MI_ref10:CBT_ref10:TIME1  -0.16000 0.174600  0.177700 -0.9005 0.3678
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486
summary(AVCIG_nobl_interaction_MIref1_CBTref1_TIMEref1) #ref: MI=1, CBT=1, Time=1
##                               Estimates Model SE Robust SE    wald      p
## (Intercept)                     1.98700 0.114700  0.111400 17.8400 0.0000
## cAGE                           -0.01556 0.051830  0.059580 -0.2612 0.7939
## cBLCGSMD                        0.05017 0.006273  0.005404  9.2830 0.0000
## MI_ref10                       -0.16840 0.164300  0.159800 -1.0540 0.2918
## CBT_ref10                      -0.04249 0.173500  0.176600 -0.2406 0.8099
## TIME_ref10                     -0.05795 0.081550  0.088910 -0.6518 0.5145
## MI_ref10:CBT_ref10              0.16920 0.246400  0.243600  0.6946 0.4873
## MI_ref10:TIME_ref10            -0.03328 0.116800  0.117500 -0.2832 0.7770
## CBT_ref10:TIME_ref10           -0.17690 0.123700  0.142700 -1.2400 0.2150
## MI_ref10:CBT_ref10:TIME_ref10   0.16000 0.174600  0.177700  0.9005 0.3678
## 
##  Estimated Correlation Parameter:  0.748 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.7787 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  243    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  486