rm(list=ls())


# Load libraries ---------------------------

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(geeM)
## Loading required package: Matrix
# Set wd for generated output ---------------------------

#setwd("/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/RESOURCES-AK-R")


# Read data ---------------------------

gee_nobl_dt <- read.csv("/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/PPVABS295.csv") #no baseline data
dim(gee_nobl_dt) 
## [1] 590  27
length(unique(gee_nobl_dt$id)) # Check number of clusters (i.e., unique IDs)
## [1] 295
# Specify corstr = ar1 ---------------------------

corstr = "ar1"


# Create covariate data objects ---------------------------

covariates_nobl_dt <- 
  cbind.data.frame(
    cAGE = gee_nobl_dt$AGE-mean(gee_nobl_dt$AGE),
    cBLCGSMD = gee_nobl_dt$BLCGSMD - mean(gee_nobl_dt$BLCGSMD),
    MI = as.factor(gee_nobl_dt$MI),
    CBT = as.factor(gee_nobl_dt$CBT),
    TIME=as.factor(gee_nobl_dt$TIME),
    #CBT=as.factor(gee_nobl_dt$CBT),
    M3ENVDAYS = gee_nobl_dt$M3ENVDAYS,
    M6ENVDAYS = gee_nobl_dt$M6ENVDAYS ,
    MIBYCBT = as.factor(gee_nobl_dt$MIBYCBT),
    MIBYTIME = as.factor(gee_nobl_dt$MIBYTIME),
    CBTBYTIME = as.factor(gee_nobl_dt$CBTBYTIME),
    MIBYCBTBYTIME = as.factor(gee_nobl_dt$MIBYCBTBYTIME),
    id = gee_nobl_dt$id,
    log_unctrldays = gee_nobl_dt$log_unctrldays
  )




# DV = PPVABS ---------------------------

## create dataframe
PPVABS_nobl_dt <- as.data.frame(
  cbind.data.frame(PPVABS=gee_nobl_dt$PPVABS,
                   covariates_nobl_dt)
)

class(PPVABS_nobl_dt)
## [1] "data.frame"
dim(PPVABS_nobl_dt)
## [1] 590  14
summary(PPVABS_nobl_dt)  
##      PPVABS            cAGE            cBLCGSMD       MI      CBT     TIME   
##  Min.   :0.0000   Min.   :-2.8842   Min.   :-10.934   0:296   0:272   0:295  
##  1st Qu.:0.0000   1st Qu.:-0.7877   1st Qu.: -7.225   1:294   1:318   1:295  
##  Median :0.0000   Median : 0.1130   Median : -1.934                          
##  Mean   :0.2169   Mean   : 0.0000   Mean   :  0.000                          
##  3rd Qu.:0.0000   3rd Qu.: 0.8028   3rd Qu.:  5.888                          
##  Max.   :1.0000   Max.   : 2.3349   Max.   : 49.411                          
##                                                                              
##    M3ENVDAYS       M6ENVDAYS     MIBYCBT MIBYTIME CBTBYTIME MIBYCBTBYTIME
##  Min.   : 0.00   Min.   : 0.00   0:432   0:443    0:431     0:511        
##  1st Qu.: 0.00   1st Qu.: 0.00   1:158   1:147    1:159     1: 79        
##  Median :12.00   Median : 0.00                                           
##  Mean   :21.34   Mean   :19.94                                           
##  3rd Qu.:41.00   3rd Qu.:36.00                                           
##  Max.   :60.00   Max.   :60.00                                           
##  NA's   :26      NA's   :34                                              
##        id         log_unctrldays 
##  Min.   :  1.00   Min.   :0.000  
##  1st Qu.: 78.25   1st Qu.:2.944  
##  Median :154.00   Median :3.662  
##  Mean   :157.10   Mean   :3.153  
##  3rd Qu.:237.75   3rd Qu.:4.111  
##  Max.   :316.00   Max.   :4.111  
## 
colnames(PPVABS_nobl_dt)
##  [1] "PPVABS"         "cAGE"           "cBLCGSMD"       "MI"            
##  [5] "CBT"            "TIME"           "M3ENVDAYS"      "M6ENVDAYS"     
##  [9] "MIBYCBT"        "MIBYTIME"       "CBTBYTIME"      "MIBYCBTBYTIME" 
## [13] "id"             "log_unctrldays"
## restrict to complete cases
#PPVABS_nobl_dt_na.omit <- na.omit(PPVABS_nobl_dt)  
#dim(PPVABS_nobl_dt_na.omit)  
#summary(PPVABS_nobl_dt_na.omit)
#colnames(PPVABS_nobl_dt_na.omit)



# DV = PPVABS ---------------------------

#relevel MI and CBT
PPVABS_nobl_dt$MI_ref1 <- PPVABS_nobl_dt$MI %>% relevel("1")
PPVABS_nobl_dt$CBT_ref1 <- PPVABS_nobl_dt$CBT %>% relevel("1")
PPVABS_nobl_dt$TIME_ref1 <- PPVABS_nobl_dt$TIME %>% relevel("1")

# # ignore missing values
# ## create an indicator column for any missing values 
# PPVABS_nobl_dt$any_na <- PPVABS_nobl_dt %>% apply(1, function(x){any(is.na(x))}) 
# 
# # ## left join the dataset by the id column using the group_by() function
# PPVABS_nobl_dt %<>% left_join(PPVABS_nobl_dt %>% group_by(id) %>% 
#                                 summarise(any_na2 = any(any_na)), by="id")
# 
# 
# # ## filter out rows with any NAs
# PPVABS_nobl_dt %<>% filter(any_na2 != T)


# modeling
## fit main model
PPVABS_nobl_main <- geem(formula =   
                           PPVABS~
                           cAGE +
                           cBLCGSMD+
                           MI +
                           CBT +
                           TIME ,
                         data = PPVABS_nobl_dt,
                         id=id,
                         family = binomial,
                         corstr = corstr
)

## print model summary
summary(PPVABS_nobl_main)
##             Estimates Model SE Robust SE    wald         p
## (Intercept)  -1.37700  0.24020   0.24280 -5.6720 1.000e-08
## cAGE         -0.35530  0.11790   0.09902 -3.5880 3.328e-04
## cBLCGSMD     -0.02397  0.01528   0.01857 -1.2910 1.968e-01
## MI1          -0.09669  0.25680   0.25360 -0.3812 7.031e-01
## CBT1          0.17600  0.25800   0.26010  0.6767 4.986e-01
## TIME1        -0.04155  0.13650   0.13470 -0.3085 7.577e-01
## 
##  Estimated Correlation Parameter:  0.5563 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.011 
## 
##  Number of GEE iterations: 3 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
## fit 3-way interaction model
PPVABS_nobl_interaction <- 
  geem(formula =   PPVABS~
         cAGE +
         cBLCGSMD+
         MI +
         CBT +
         TIME + 
         MI*CBT+
         MI*TIME+
         CBT*TIME+
         MI*CBT*TIME,
       #ref: Mi=0, Cbt=0, time=0
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_TIMEref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI + CBT + TIME_ref1 + MI * CBT + MI * 
         TIME_ref1 + CBT * TIME_ref1 + MI * CBT * TIME_ref1,
       #ref: Mi=0, Cbt=0, time=1
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_MIref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME + MI_ref1 * CBT + MI_ref1 * 
         TIME + CBT * TIME + MI_ref1 * CBT * TIME,
       #ref: Mi=1, Cbt=0, time=0
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_MIref1_TIMEref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME_ref1 + MI_ref1 * CBT + MI_ref1 * 
         TIME_ref1 + CBT * TIME_ref1 + MI_ref1 * CBT * TIME_ref1,
       #ref: Mi=1, Cbt=0, time=1
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_CBTref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME + MI * CBT_ref1 + MI * 
         TIME + CBT_ref1 * TIME + MI * CBT_ref1 * TIME,
       #ref: Mi=0, Cbt=1, time=0
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_CBTref1_TIMEref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME_ref1 + MI * CBT_ref1 + MI * 
         TIME_ref1 + CBT_ref1 * TIME_ref1 + MI * CBT_ref1 * TIME_ref1,
       #ref: Mi=0, Cbt=1, time=1
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_MIref1_CBTref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME + CBT_ref1 * TIME + MI_ref1 * CBT_ref1 * TIME,
       #ref: Mi=0, Cbt=1, time=0
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

PPVABS_nobl_interaction_MIref1_CBTref1_TIMEref1 <- 
  geem(formula = PPVABS ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME_ref1 + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME_ref1 + CBT_ref1 * TIME_ref1 + MI_ref1 * CBT_ref1 * TIME_ref1,
       #ref: Mi=0, Cbt=1, time=1
       data = PPVABS_nobl_dt,
       id=id,
       family = binomial,
       corstr = corstr
  )

## print interaction model summary
summary(PPVABS_nobl_interaction) #base: MI=0, CBT=0, TIME=0
##                Estimates Model SE Robust SE     wald         p
## (Intercept)     -1.35700  0.30100   0.30100 -4.51000 6.490e-06
## cAGE            -0.36400  0.11900   0.09947 -3.65900 2.529e-04
## cBLCGSMD        -0.02180  0.01515   0.01825 -1.19500 2.321e-01
## MI1             -0.40040  0.45840   0.46050 -0.86940 3.847e-01
## CBT1             0.17370  0.40170   0.40410  0.42980 6.674e-01
## TIME1           -0.08657  0.27550   0.26100 -0.33170 7.401e-01
## MI1:CBT1         0.44670  0.59550   0.59450  0.75140 4.524e-01
## MI1:TIME1        0.58750  0.41040   0.45100  1.30300 1.927e-01
## CBT1:TIME1       0.01296  0.37410   0.35690  0.03632 9.710e-01
## MI1:CBT1:TIME1  -0.90330  0.54820   0.56060 -1.61100 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_TIMEref1) #base: MI=0, CBT=0, TIME=1
##                     Estimates Model SE Robust SE     wald         p
## (Intercept)          -1.44400  0.30760   0.31250 -4.62000 3.840e-06
## cAGE                 -0.36400  0.11900   0.09947 -3.65900 2.529e-04
## cBLCGSMD             -0.02180  0.01515   0.01825 -1.19500 2.321e-01
## MI1                   0.18720  0.42790   0.43120  0.43400 6.643e-01
## CBT1                  0.18660  0.40980   0.41380  0.45090 6.520e-01
## TIME_ref10            0.08657  0.27550   0.26100  0.33170 7.401e-01
## MI1:CBT1             -0.45660  0.58880   0.58670 -0.77830 4.364e-01
## MI1:TIME_ref10       -0.58750  0.41040   0.45100 -1.30300 1.927e-01
## CBT1:TIME_ref10      -0.01296  0.37410   0.35690 -0.03632 9.710e-01
## MI1:CBT1:TIME_ref10   0.90330  0.54820   0.56060  1.61100 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_MIref1) #ref: MI=1, CBT=0, TIME=0
##                     Estimates Model SE Robust SE    wald         p
## (Intercept)           -1.7580  0.34920   0.35790 -4.9110 9.100e-07
## cAGE                  -0.3640  0.11900   0.09947 -3.6590 2.529e-04
## cBLCGSMD              -0.0218  0.01515   0.01825 -1.1950 2.321e-01
## MI_ref10               0.4004  0.45840   0.46050  0.8694 3.847e-01
## CBT1                   0.6203  0.43820   0.43840  1.4150 1.570e-01
## TIME1                  0.5010  0.30420   0.36760  1.3630 1.729e-01
## MI_ref10:CBT1         -0.4467  0.59550   0.59450 -0.7514 4.524e-01
## MI_ref10:TIME1        -0.5875  0.41040   0.45100 -1.3030 1.927e-01
## CBT1:TIME1            -0.8903  0.40070   0.43190 -2.0610 3.926e-02
## MI_ref10:CBT1:TIME1    0.9033  0.54820   0.56060  1.6110 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_MIref1_TIMEref1) #ref: MI=1, CBT=0, TIME=1
##                          Estimates Model SE Robust SE    wald         p
## (Intercept)                -1.2570  0.30030   0.30790 -4.0810 4.491e-05
## cAGE                       -0.3640  0.11900   0.09947 -3.6590 2.529e-04
## cBLCGSMD                   -0.0218  0.01515   0.01825 -1.1950 2.321e-01
## MI_ref10                   -0.1872  0.42790   0.43120 -0.4340 6.643e-01
## CBT1                       -0.2700  0.42040   0.41810 -0.6457 5.185e-01
## TIME_ref10                 -0.5010  0.30420   0.36760 -1.3630 1.729e-01
## MI_ref10:CBT1               0.4566  0.58880   0.58670  0.7783 4.364e-01
## MI_ref10:TIME_ref10         0.5875  0.41040   0.45100  1.3030 1.927e-01
## CBT1:TIME_ref10             0.8903  0.40070   0.43190  2.0610 3.926e-02
## MI_ref10:CBT1:TIME_ref10   -0.9033  0.54820   0.56060 -1.6110 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_CBTref1) #ref: MI=0, CBT=1, TIME=0
##                     Estimates Model SE Robust SE     wald         p
## (Intercept)          -1.18400  0.27110   0.26950 -4.39100 1.129e-05
## cAGE                 -0.36400  0.11900   0.09947 -3.65900 2.529e-04
## cBLCGSMD             -0.02180  0.01515   0.01825 -1.19500 2.321e-01
## MI1                   0.04630  0.37910   0.37260  0.12430 9.011e-01
## CBT_ref10            -0.17370  0.40170   0.40410 -0.42980 6.674e-01
## TIME1                -0.07361  0.25310   0.24340 -0.30240 7.624e-01
## MI1:CBT_ref10        -0.44670  0.59550   0.59450 -0.75140 4.524e-01
## MI1:TIME1            -0.31580  0.36330   0.33280 -0.94890 3.427e-01
## CBT_ref10:TIME1      -0.01296  0.37410   0.35690 -0.03632 9.710e-01
## MI1:CBT_ref10:TIME1   0.90330  0.54820   0.56060  1.61100 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_CBTref1_TIMEref1) #ref: MI=0, CBT=1, TIME=1
##                          Estimates Model SE Robust SE     wald         p
## (Intercept)               -1.25700  0.27610   0.27080 -4.64200 3.460e-06
## cAGE                      -0.36400  0.11900   0.09947 -3.65900 2.529e-04
## cBLCGSMD                  -0.02180  0.01515   0.01825 -1.19500 2.321e-01
## MI1                       -0.26950  0.40290   0.39420 -0.68360 4.942e-01
## CBT_ref10                 -0.18660  0.40980   0.41380 -0.45090 6.520e-01
## TIME_ref10                 0.07361  0.25310   0.24340  0.30240 7.624e-01
## MI1:CBT_ref10              0.45660  0.58880   0.58670  0.77830 4.364e-01
## MI1:TIME_ref10             0.31580  0.36330   0.33280  0.94890 3.427e-01
## CBT_ref10:TIME_ref10       0.01296  0.37410   0.35690  0.03632 9.710e-01
## MI1:CBT_ref10:TIME_ref10  -0.90330  0.54820   0.56060 -1.61100 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_MIref1_CBTref1) #ref: MI=1, CBT=1, TIME=0
##                          Estimates Model SE Robust SE    wald         p
## (Intercept)                -1.1370  0.26720   0.25640 -4.4350 0.0000092
## cAGE                       -0.3640  0.11900   0.09947 -3.6590 0.0002529
## cBLCGSMD                   -0.0218  0.01515   0.01825 -1.1950 0.2321000
## MI_ref10                   -0.0463  0.37910   0.37260 -0.1243 0.9011000
## CBT_ref10                  -0.6203  0.43820   0.43840 -1.4150 0.1570000
## TIME1                      -0.3894  0.26070   0.22680 -1.7170 0.0859500
## MI_ref10:CBT_ref10          0.4467  0.59550   0.59450  0.7514 0.4524000
## MI_ref10:TIME1              0.3158  0.36330   0.33280  0.9489 0.3427000
## CBT_ref10:TIME1             0.8903  0.40070   0.43190  2.0610 0.0392600
## MI_ref10:CBT_ref10:TIME1   -0.9033  0.54820   0.56060 -1.6110 0.1071000
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590
summary(PPVABS_nobl_interaction_MIref1_CBTref1_TIMEref1) #ref: MI=1, CBT=1, TIME=1
##                               Estimates Model SE Robust SE    wald         p
## (Intercept)                     -1.5270  0.29590   0.28520 -5.3520 9.000e-08
## cAGE                            -0.3640  0.11900   0.09947 -3.6590 2.529e-04
## cBLCGSMD                        -0.0218  0.01515   0.01825 -1.1950 2.321e-01
## MI_ref10                         0.2695  0.40290   0.39420  0.6836 4.942e-01
## CBT_ref10                        0.2700  0.42040   0.41810  0.6457 5.185e-01
## TIME_ref10                       0.3894  0.26070   0.22680  1.7170 8.595e-02
## MI_ref10:CBT_ref10              -0.4566  0.58880   0.58670 -0.7783 4.364e-01
## MI_ref10:TIME_ref10             -0.3158  0.36330   0.33280 -0.9489 3.427e-01
## CBT_ref10:TIME_ref10            -0.8903  0.40070   0.43190 -2.0610 3.926e-02
## MI_ref10:CBT_ref10:TIME_ref10    0.9033  0.54820   0.56060  1.6110 1.071e-01
## 
##  Estimated Correlation Parameter:  0.5693 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  1.012 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  295    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  590