rm(list=ls())


# Load libraries ---------------------------

library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(geeM)
## Loading required package: Matrix
# Read data ---------------------------

gee_nobl_dt <- read_sas("/Volumes/caas/MERITS/RESOURCES/Data Processing/Data Analysis/Outcome Analyses 2016/Manuscript Revisions_2020/Aditya Khanna Analyses 2022/RESOURCES-AK-R/../GEEOUTCOME2021NOBL-frombackup.sas7bdat") #no baseline data
dim(gee_nobl_dt) 
## [1] 604  37
# Specify corstr = ar1 ---------------------------

corstr = "ar1"


# Create covariate data objects ---------------------------

covariates_nobl_dt <- 
  cbind.data.frame(
    cAGE = gee_nobl_dt$AGE-mean(gee_nobl_dt$AGE),
    cBLCGSMD = gee_nobl_dt$BLCGSMD - mean(gee_nobl_dt$BLCGSMD),
    MI = as.factor(gee_nobl_dt$MI),
    CBT = as.factor(gee_nobl_dt$CBT),
    TIME=as.factor(gee_nobl_dt$TIME),
    #CBT=as.factor(gee_nobl_dt$CBT),
    M3ENVDAYS = gee_nobl_dt$M3ENVDAYS,
    M6ENVDAYS = gee_nobl_dt$M6ENVDAYS ,
    MIBYCBT = as.factor(gee_nobl_dt$MIBYCBT),
    MIBYTIME = as.factor(gee_nobl_dt$MIBYTIME),
    CBTBYTIME = as.factor(gee_nobl_dt$CBTBYTIME),
    MIBYCBTBYTIME = as.factor(gee_nobl_dt$MIBYCBTBYTIME),
    id = gee_nobl_dt$id,
    log_unctrldays = gee_nobl_dt$log_unctrldays
  )


# DV = Cig/SmkD ---------------------------
## create dataframe
CGSMD_nobl_dt <- as.data.frame(
  cbind.data.frame(CGSMD=gee_nobl_dt$CGSMD,
                   covariates_nobl_dt)
)


#relevel MI and CBT
CGSMD_nobl_dt$MI_ref1 <- CGSMD_nobl_dt$MI %>% relevel("1")
CGSMD_nobl_dt$CBT_ref1 <- CGSMD_nobl_dt$CBT %>% relevel("1")
CGSMD_nobl_dt$TIME_ref1 <- CGSMD_nobl_dt$TIME %>% relevel("1")

# ignore missing values
## create an indicator column for any missing values 
CGSMD_nobl_dt$any_na <- CGSMD_nobl_dt %>% apply(1, function(x){any(is.na(x))}) 

## left join the dataset by the id column using the group_by() function
CGSMD_nobl_dt %<>% left_join(CGSMD_nobl_dt %>% group_by(id) %>% 
                               summarise(any_na2 = any(any_na)), by="id")


## filter out rows with any NAs
CGSMD_nobl_dt %<>% filter(any_na2 != T)

# modeling
## main model
CGSMD_nobl_main <- geem(formula =   
                          CGSMD~
                          cAGE +
                          cBLCGSMD+
                          MI +
                          CBT +
                          TIME ,
                        data = CGSMD_nobl_dt,
                        id=id,
                        family = MASS::negative.binomial(1),
                        corstr = corstr
)


## print main model summary
summary(CGSMD_nobl_main)
##             Estimates Model SE Robust SE    wald        p
## (Intercept)   1.96100 0.089830  0.095270 20.5800 0.000000
## cAGE         -0.02582 0.044690  0.050800 -0.5083 0.611200
## cBLCGSMD      0.04462 0.005711  0.004886  9.1330 0.000000
## MI1          -0.02427 0.098520  0.097120 -0.2499 0.802700
## CBT1          0.06931 0.098740  0.097130  0.7137 0.475400
## TIME1         0.09890 0.037940  0.038150  2.5920 0.009532
## 
##  Estimated Correlation Parameter:  0.7412 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.528 
## 
##  Number of GEE iterations: 3 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
## interaction model
CGSMD_nobl_interaction <- geem(formula =   
                                 CGSMD~
                                 cAGE +
                                 cBLCGSMD+
                                 MI +
                                 CBT +
                                 TIME + 
                                 MI*CBT+
                                 MI*TIME+
                                 CBT*TIME+
                                 MI*CBT*TIME,
                               #mi=0, cbt=0, time=0
                               data = CGSMD_nobl_dt,
                               id=id,
                               family = MASS::negative.binomial(1),
                               corstr = corstr
)

CGSMD_nobl_interaction_TIMEref1 <- geem(formula = 
                                          CGSMD ~ cAGE + cBLCGSMD + MI + CBT + TIME_ref1 + MI * CBT + MI * TIME_ref1 + 
                                          CBT * TIME_ref1 + MI * CBT * TIME_ref1,
                                        #mi=0, cbt=0, time=1
                                        data = CGSMD_nobl_dt,
                                        id=id,
                                        family = MASS::negative.binomial(1),
                                        corstr = corstr
)


CGSMD_nobl_interaction_MIref1 <- 
  geem(formula = CGSMD ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME + MI_ref1 * CBT + MI_ref1 * TIME + 
         CBT * TIME + MI_ref1 * CBT * TIME,
       #ref: Mi=1, Cbt=0, time=0
       data = CGSMD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

CGSMD_nobl_interaction_MIref1_TIMEref1 <- 
  geem(formula = CGSMD ~ cAGE + cBLCGSMD + MI_ref1 + CBT + TIME_ref1 + 
         MI_ref1 * CBT + MI_ref1 * TIME_ref1 + 
         CBT * TIME_ref1 + MI_ref1 * CBT * TIME_ref1,
       #ref: Mi=1, Cbt=0, time=1
       data = CGSMD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

CGSMD_nobl_interaction_CBTref1 <- 
  geem(formula = CGSMD ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME + MI * CBT_ref1 + MI * 
         TIME + CBT_ref1 * TIME + MI * CBT_ref1 * TIME,
       #ref: Mi=0, Cbt=1, time=0
       data = CGSMD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

CGSMD_nobl_interaction_CBTref1_TIMEref1 <- 
  geem(formula = CGSMD ~ cAGE + cBLCGSMD + MI + CBT_ref1 + TIME_ref1 + MI * CBT_ref1 + MI * 
         TIME_ref1 + CBT_ref1 * TIME_ref1 + MI * CBT_ref1 * TIME_ref1,
       #ref: Mi=0, Cbt=1, time=1
       data = CGSMD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

CGSMD_nobl_interaction_MIref1_CBTref1 <- 
  geem(formula = CGSMD ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME + CBT_ref1 * TIME + MI_ref1 * CBT_ref1 * TIME,
       #ref: Mi=0, Cbt=1, time=0
       data = CGSMD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )

CGSMD_nobl_interaction_MIref1_CBTref1_TIMEref1 <- 
  geem(formula = CGSMD ~ cAGE + cBLCGSMD + MI_ref1 + CBT_ref1 + TIME_ref1 + MI_ref1 * CBT_ref1 + MI_ref1 * 
         TIME_ref1 + CBT_ref1 * TIME_ref1 + MI_ref1 * CBT_ref1 * TIME_ref1,
       #ref: Mi=0, Cbt=1, time=1
       data = CGSMD_nobl_dt,
       id=id,
       family = MASS::negative.binomial(1),
       corstr = corstr
  )


## print interaction model summary
summary(CGSMD_nobl_interaction) #base: MI=0, CBT=0, time=0
##                Estimates Model SE Robust SE    wald      p
## (Intercept)      2.01000 0.110100  0.120000 16.7500 0.0000
## cAGE            -0.02455 0.044860  0.051060 -0.4808 0.6307
## cBLCGSMD         0.04529 0.005742  0.004835  9.3670 0.0000
## MI1             -0.15170 0.155900  0.160100 -0.9479 0.3432
## CBT1            -0.04097 0.153100  0.149300 -0.2744 0.7837
## TIME1            0.07267 0.077410  0.062440  1.1640 0.2445
## MI1:CBT1         0.25960 0.213500  0.207800  1.2490 0.2116
## MI1:TIME1        0.10730 0.109100  0.119300  0.8989 0.3687
## CBT1:TIME1       0.07835 0.107200  0.085290  0.9187 0.3583
## MI1:CBT1:TIME1  -0.24370 0.149300  0.151800 -1.6050 0.1084
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_TIMEref1) #base: MI=0, CBT=0, time=1
##                     Estimates Model SE Robust SE     wald      p
## (Intercept)           2.08300 0.109700  0.104200 19.98000 0.0000
## cAGE                 -0.02455 0.044860  0.051060 -0.48080 0.6307
## cBLCGSMD              0.04529 0.005742  0.004835  9.36700 0.0000
## MI1                  -0.04446 0.154600  0.162400 -0.27370 0.7843
## CBT1                  0.03738 0.152200  0.135800  0.27520 0.7832
## TIME_ref10           -0.07267 0.077410  0.062440 -1.16400 0.2445
## MI1:CBT1              0.01595 0.212100  0.209700  0.07605 0.9394
## MI1:TIME_ref10       -0.10730 0.109100  0.119300 -0.89890 0.3687
## CBT1:TIME_ref10      -0.07835 0.107200  0.085290 -0.91870 0.3583
## MI1:CBT1:TIME_ref10   0.24370 0.149300  0.151800  1.60500 0.1084
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_MIref1) #ref: MI=1, CBT=0, time=0
##                     Estimates Model SE Robust SE    wald       p
## (Intercept)           1.85800 0.109800  0.106200 17.5000 0.00000
## cAGE                 -0.02455 0.044860  0.051060 -0.4808 0.63070
## cBLCGSMD              0.04529 0.005742  0.004835  9.3670 0.00000
## MI_ref10              0.15170 0.155900  0.160100  0.9479 0.34320
## CBT1                  0.21860 0.147900  0.145100  1.5070 0.13190
## TIME1                 0.17990 0.076820  0.101700  1.7700 0.07681
## MI_ref10:CBT1        -0.25960 0.213500  0.207800 -1.2490 0.21160
## MI_ref10:TIME1       -0.10730 0.109100  0.119300 -0.8989 0.36870
## CBT1:TIME1           -0.16530 0.103800  0.125600 -1.3160 0.18800
## MI_ref10:CBT1:TIME1   0.24370 0.149300  0.151800  1.6050 0.10840
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_MIref1_TIMEref1) #ref: MI=1, CBT=0, time=1
##                          Estimates Model SE Robust SE     wald       p
## (Intercept)                2.03800 0.108500  0.123000 16.57000 0.00000
## cAGE                      -0.02455 0.044860  0.051060 -0.48080 0.63070
## cBLCGSMD                   0.04529 0.005742  0.004835  9.36700 0.00000
## MI_ref10                   0.04446 0.154600  0.162400  0.27370 0.78430
## CBT1                       0.05333 0.146900  0.157900  0.33770 0.73560
## TIME_ref10                -0.17990 0.076820  0.101700 -1.77000 0.07681
## MI_ref10:CBT1             -0.01595 0.212100  0.209700 -0.07605 0.93940
## MI_ref10:TIME_ref10        0.10730 0.109100  0.119300  0.89890 0.36870
## CBT1:TIME_ref10            0.16530 0.103800  0.125600  1.31600 0.18800
## MI_ref10:CBT1:TIME_ref10  -0.24370 0.149300  0.151800 -1.60500 0.10840
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_CBTref1) #ref: MI=0, CBT=1, time=0
##                     Estimates Model SE Robust SE    wald       p
## (Intercept)           1.96900 0.105800  0.090330 21.8000 0.00000
## cAGE                 -0.02455 0.044860  0.051060 -0.4808 0.63070
## cBLCGSMD              0.04529 0.005742  0.004835  9.3670 0.00000
## MI1                   0.10790 0.145000  0.134300  0.8032 0.42190
## CBT_ref10             0.04097 0.153100  0.149300  0.2744 0.78370
## TIME1                 0.15100 0.074220  0.058110  2.5990 0.00935
## MI1:CBT_ref10        -0.25960 0.213500  0.207800 -1.2490 0.21160
## MI1:TIME1            -0.13640 0.101900  0.093840 -1.4540 0.14600
## CBT_ref10:TIME1      -0.07835 0.107200  0.085290 -0.9187 0.35830
## MI1:CBT_ref10:TIME1   0.24370 0.149300  0.151800  1.6050 0.10840
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_CBTref1_TIMEref1) #ref: MI=0, CBT=1, time=1
##                          Estimates Model SE Robust SE     wald       p
## (Intercept)                2.12000 0.104900  0.087900 24.12000 0.00000
## cAGE                      -0.02455 0.044860  0.051060 -0.48080 0.63070
## cBLCGSMD                   0.04529 0.005742  0.004835  9.36700 0.00000
## MI1                       -0.02851 0.144200  0.132600 -0.21510 0.82970
## CBT_ref10                 -0.03738 0.152200  0.135800 -0.27520 0.78320
## TIME_ref10                -0.15100 0.074220  0.058110 -2.59900 0.00935
## MI1:CBT_ref10             -0.01595 0.212100  0.209700 -0.07605 0.93940
## MI1:TIME_ref10             0.13640 0.101900  0.093840  1.45400 0.14600
## CBT_ref10:TIME_ref10       0.07835 0.107200  0.085290  0.91870 0.35830
## MI1:CBT_ref10:TIME_ref10  -0.24370 0.149300  0.151800 -1.60500 0.10840
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_MIref1_CBTref1) #ref: MI=1, CBT=1, time=0
##                          Estimates Model SE Robust SE    wald      p
## (Intercept)                2.07700 0.098990  0.099590 20.8500 0.0000
## cAGE                      -0.02455 0.044860  0.051060 -0.4808 0.6307
## cBLCGSMD                   0.04529 0.005742  0.004835  9.3670 0.0000
## MI_ref10                  -0.10790 0.145000  0.134300 -0.8032 0.4219
## CBT_ref10                 -0.21860 0.147900  0.145100 -1.5070 0.1319
## TIME1                      0.01461 0.069870  0.073680  0.1983 0.8428
## MI_ref10:CBT_ref10         0.25960 0.213500  0.207800  1.2490 0.2116
## MI_ref10:TIME1             0.13640 0.101900  0.093840  1.4540 0.1460
## CBT_ref10:TIME1            0.16530 0.103800  0.125600  1.3160 0.1880
## MI_ref10:CBT_ref10:TIME1  -0.24370 0.149300  0.151800 -1.6050 0.1084
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432
summary(CGSMD_nobl_interaction_MIref1_CBTref1_TIMEref1) #ref: MI=1, CBT=1, time=1
##                               Estimates Model SE Robust SE     wald      p
## (Intercept)                     2.09200 0.098910  0.099930 20.93000 0.0000
## cAGE                           -0.02455 0.044860  0.051060 -0.48080 0.6307
## cBLCGSMD                        0.04529 0.005742  0.004835  9.36700 0.0000
## MI_ref10                        0.02851 0.144200  0.132600  0.21510 0.8297
## CBT_ref10                      -0.05333 0.146900  0.157900 -0.33770 0.7356
## TIME_ref10                     -0.01461 0.069870  0.073680 -0.19830 0.8428
## MI_ref10:CBT_ref10              0.01595 0.212100  0.209700  0.07605 0.9394
## MI_ref10:TIME_ref10            -0.13640 0.101900  0.093840 -1.45400 0.1460
## CBT_ref10:TIME_ref10           -0.16530 0.103800  0.125600 -1.31600 0.1880
## MI_ref10:CBT_ref10:TIME_ref10   0.24370 0.149300  0.151800  1.60500 0.1084
## 
##  Estimated Correlation Parameter:  0.7507 
##  Correlation Structure:  ar1 
##  Est. Scale Parameter:  0.5267 
## 
##  Number of GEE iterations: 4 
##  Number of Clusters:  216    Maximum Cluster Size:  2 
##  Number of observations with nonzero weight:  432