R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

setwd("~/Desktop/FDU LAB")
TLFB2 <- read.csv("TLFB_9-22-22(TLFB).csv")

TLFB <-TLFB2[-c(1,2, 618),]

#TLFB$STD
#TLFB$Partner_Type
#con <-data.frame(TLFB$STD,TLFB$Partner_Type)
# summary(TLFB)
# require(psych)
# describe(TLFB)
# getOption("max.print")
# options(max.print = 100000)
# describeTLFB <- describe(TLFB)
# describeTLFB
# TLFB$Condom_Access

WIDE <- read.csv("WIDE_FINAL(WIDE_FINAL).csv")
#describe(WIDE)
#describeWIDE <- describe(WIDE)
#describeWIDE


#There are two datasets from my dissertation project, one is in wide format and 
#the other is in long format. The datasets are the same,
#the data structure differs based on the type of analyses that were conducted - 
#long for multilevel models and wide for everything else.




###### variables 

#####demographics

#demo <- WIDE[,1:23]


######## condomaccess

#CDAC <-TLFB$Condom_Access

#######partner STI

#PSTI <-TLFB$STD
#require(stringr)
#TLFB$STD <- gsub("2", "1", TLFB$STD)
#TLFB$STD
######partner types

#ptype <- TLFB$Partner_Type
#TLFB$Partner_Type
#########Motivation

#moti <-TLFB$UCLA_Total



#########information

#info <-TLFB$STD_KQ_Total


#########behavior skill

# beha <- TLFB$CISQ_W_Total
# beha <- TLFB$CISQ_S_Total
# beha <- TLFB$CISQ_DR_Total
# beha <- TLFB$CISQ_RC_Total
# beha <- TLFB$CISQ_R_Total
# beha <- TLFB$CISQ_D_Total
# beha <- TLFB$CISQ_P_Total

BSsum <- data.frame(TLFB$CISQ_W_Total, TLFB$CISQ_S_Total, TLFB$CISQ_DR_Total, TLFB$CISQ_RC_Total, 
                     TLFB$CISQ_R_Total, TLFB$CISQ_D_Total, TLFB$CISQ_P_Total)
TLFB$BSsum <-rowSums(BSsum)
#TLFB$BSsum
##########substance use

#drinkuse <- TLFB$Drink_YN
#drug <-TLFB$Drugs_YN




######??lme4

#library(dplyr)
#install.packages("pacman")
#pacman::p_load(Matrix)
library(lme4)
## Loading required package: Matrix
###### as numeric 
TLFB$UCLA_Total <- as.numeric(as.character(TLFB$UCLA_Total))
TLFB$STD_KQ_Total <- as.numeric(as.character(TLFB$STD_KQ_Total))
TLFB$BSsum <- as.numeric(as.character(TLFB$BSsum))
TLFB$Condom_Access <- as.numeric(as.character(TLFB$Condom_Access))
TLFB$ID <- as.numeric(as.character(TLFB$ID))
TLFB$STD <- as.numeric(as.character(TLFB$STD))
TLFB$Gender <- as.numeric(as.character(TLFB$Gender))
TLFB$Drink_YN <- as.numeric(as.character(TLFB$Drink_YN))
TLFB$Drugs_YN <- as.numeric(as.character(TLFB$Drugs_YN))


#TLFB$Drink_YN
#TLFB$Drugs_YN
# combine the drug and alcohol use 
#subs <- data.frame(as.numeric(TLFB$Drink_YN), as.numeric(TLFB$Drugs_YN))
#require(stringr)
#subs$as.numeric.TLFB.Drugs_YN. <- as.numeric(gsub("1", "2", subs$as.numeric.TLFB.Drugs_YN.)) #drug = 2
#TLFB$substanceuse <-rowSums(subs) # 0 = no use, 1 = drink, 2 = drug, 3 = combined
#TLFB$substanceuse


#TLFB$substanceuse <-rowSums(subs)
#TLFB$substanceuse <- gsub("2", "1", TLFB$substanceuse)
#TLFB$substanceuse


#TLFB$Partner_Type
#TLFB$Partner_Type <- gsub("1", "0", TLFB$Partner_Type)
#TLFB$Partner_Type <- gsub("2", "1", TLFB$Partner_Type) # 0 New + casual, 1 regular
#TLFB$Partner_Type
###scaled numeric values 
TLFB$UCLA_Total_scaled <- scale(TLFB$UCLA_Total)
TLFB$STD_KQ_Total_scaled <- scale(TLFB$STD_KQ_Total)
TLFB$BSsum_scaled <- scale(TLFB$BSsum)
TLFB$ID_scaled <- scale(TLFB$ID)


#Test whether the categorical variables are factor variables
#Change to factor if the results of the above were "FALSE"
#TLFB$substanceuse <- factor(TLFB$substanceuse, levels = c(0,1))
TLFB$Gender <- factor(TLFB$Gender, levels = c(0,1)) # 0 = male, 1 = female
TLFB$Condom_Access <- factor(TLFB$Condom_Access, levels = c(0,1)) # 0 = no, 1 = yes

TLFB$STD <- gsub("2", "1", TLFB$STD) # 0 = don't know, 1 = postive or negative 
TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$Partner_Type <- factor(TLFB$Partner_Type, levels = c(0,1,2)) # Regular = 2, Casual = 1, New = 0
TLFB$Drugs_YN <- factor(TLFB$Drugs_YN, levels = c(0,1)) # 0 no 1 yes
TLFB$Drink_YN <- factor(TLFB$Drink_YN, levels = c(0,1)) # 0 no 1 yes

#is.factor(TLFB$substanceuse)
is.factor(TLFB$Gender)
## [1] TRUE
is.factor(TLFB$Condom_Access)
## [1] TRUE
is.factor(TLFB$STD)
## [1] TRUE
is.factor(TLFB$Partner_Type)
## [1] TRUE
is.factor(TLFB$Drink_YN)
## [1] TRUE
is.factor(TLFB$Drugs_YN)
## [1] TRUE
require(psych)
## Loading required package: psych
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gtsummary)
##################table

#########random intercepts and slopes 


# 1. Load required libraries
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(dplyr)

# --- Assuming the 'TLFB' data is loaded in your R session ---

# 2. Prepare the Data and ENSURE Condom_Access is numeric
#    Create a sequential Observation_Index within each ID group.
TLFB_plot <- TLFB %>%
  group_by(ID) %>%
  mutate(Observation_Index = row_number(),
         # Ensure Condom_Access is treated as numeric 0 or 1
         Condom_Access_Numeric = as.numeric(as.character(Condom_Access))) %>%
  ungroup()

# 3. Generate the ggplot using the numeric outcome variable
p_raw <- ggplot(TLFB_plot, aes(x = Observation_Index, y = Condom_Access_Numeric, group = ID)) +
  
  # Add lines connecting the raw binary values for each ID.
  geom_line(aes(color = factor(ID)), alpha = 0.5) +
  
  # Add jittered points for the raw Condom_Access (0 or 1).
  geom_point(aes(color = factor(ID)),
             position = position_jitter(width = 0.2, height = 0.1),
             alpha = 0.8,
             size = 1.5) +
  
  # Apply a smooth line to visualize the trend for each ID using GLM.
  geom_smooth(aes(color = factor(ID)),
              method = "glm",
              method.args = list(family = binomial),
              se = FALSE,
              linewidth = 0.8) +
  
  # Use scale_y_continuous now that the variable is confirmed to be numeric
  scale_y_continuous(breaks = c(0, 1), labels = c("No Access (0)", "Access (1)")) +
  
  # Customize the plot appearance
  labs(
    title = "Raw Condom Access (0/1) Trajectories by Individual ID",
    subtitle = "Smooth lines show logistic trend per ID (for diagnostics)",
    x = "Observation Index (Sequential within ID)",
    y = "Raw Condom Access (0 = No, 1 = Yes)",
    color = "Participant ID"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

# Display the plot
print(p_raw)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#######model building 

#library(nlme)

#randomIntercept_GLMM <- glmer(Condom_Access ~ 1 + (1 | ID), 
#                              data = TLFB, 
#                              family = binomial)

#summary(randomIntercept_GLMM)


#model_M1_fixed <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                          BSsum_scaled + (1 | ID),
#                        data = TLFB, family = binomial(link = "logit"))
#summary(model_M1_fixed)


#anova(randomIntercept_GLMM, model_M1_fixed)


######simple model to complicated ones to see the model fits. 

##Baseline Model (Fixed Effects Only)



##Random Intercepts Model
# 
# 
# model_M2_RI <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                        BSsum_scaled + Gender  + Drugs_YN + Drink_YN + Partner_Type +
#                        (1 | ID), # Random Intercept only
#                      data = TLFB, family = binomial(link = "logit"),
#                      glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
# 
# summary(model_M2_RI)
# Compare M1 (Fixed) vs. M2 (Random Intercept)
# This checks if adding the random intercept significantly improves the model fit.


#anova(model_M1_fixed, model_M2_RI)



##Complex Random Slopes Model
# 
# model_M3_ComplexRS <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                               BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                               (Drugs_YN + Drink_YN + Partner_Type | ID), # Random Intercept AND Slopes
#                             data = TLFB, family = binomial(link = "logit"),
#                          glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
# 
# summary(model_M3_ComplexRS)
##Interpretation: If the $\text{p-value}$ is small (e.g., $< 0.05$), $\text{M2}$ is significantly better than $\text{M1}$, 
####indicating that the clustering by $\text{ID}$ must be accounted for.

# Compare M2 (Random Intercept) vs. M3 (Complex Random Slopes)
# This checks if the added complexity of random slopes significantly improves the fit.
#anova(model_M2_RI, model_M3_ComplexRS)
###Interpretation: If the $\text{p-value}$ is small (e.g., $< 0.05$),
####$\text{M3}$ is significantly better than $\text{M2}$, 
####justifying the use of the complex random effects structure.
####Note: The anova() function uses a Likelihood Ratio Test, 
#####which is appropriate for comparing nested models 
#####(models where one is a specific case of the other, e.g., $\text{M2}$ is $\text{M3}$ with the random slope variances fixed to zero).




###Notice the correlation between the (Intercept) and substanceuse1 is $-0.89$.
#####A correlation close to $\mathbf{-1}$ or $\mathbf{1}$ means the random effects are highly redundant
######(i.e., they are explaining the same portion of variance in a highly predictable way).
######This is the equivalent of multicollinearity in the random effects structure and is a frequent cause of the singular fit warning, even when individual variances are large.

###### Correct approach: Keep the random intercepts and slopes, but force their covariance to zero
#####Since the variances are large (meaning the slopes are needed) 
#####but the correlations are high (meaning the model is unstable), 
#####the best step is to use the simplification recommended previously: 
#####remove the correlations while keeping the large variances. 
###You can do this by splitting the random effect term into uncorrelated components using the double bar operator
# 
# library(optimx)
# model_M3_Simplified_Uncorr <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                               BSsum_scaled + Gender + Drugs_YN + Drink_YN  + Partner_Type +
#                               (1 || ID) +  # Uncorrelated Random Intercept
#                               (Drugs_YN || ID) + 
#                               (Drink_YN || ID) +
#                               (Partner_Type || ID), 
#                             data = TLFB, family = binomial,
#                             control = glmerControl(optimizer = "optimx", 
#                                                    optCtrl = list(method = "nlminb", maxiter = 10000)))
# 
# summary(model_M3_Simplified_Uncorr)



# Option to test first, removes all covariance terms:
# model_M3_Simplified <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                                BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                                (Drugs_YN || ID) + 
#                                (Drink_YN || ID) + 
#                                (Partner_Type || ID), 
#                                data = TLFB, family = binomial, 
#                                control = glmerControl(optimizer = "optimx", 
#                                                     optCtrl = list(method = "nlminb", maxiter = 10000)))
# summary(model_M3_Simplified)
# 
# 
# 
# model_M3_Final <- glmer(Condom_Access  ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                           BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                           (0 + Drugs_YN || ID) +  # Exclude Random Intercept
#                           (0 + Drink_YN || ID) +  # Exclude Random Intercept
#                           (0 + Partner_Type || ID),   # Exclude Random Intercept
#                         data = TLFB, family = binomial,
#                         control = glmerControl(optimizer = "optimx", 
#                                              optCtrl = list(method = "nlminb", maxiter = 10000)))
# 
# summary(model_M3_Final)
# 
# 
# # Model M4_Substance: Random Intercept + Random Slope for substanceuse (no correlation)
# model_M4_Substance <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                               BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                               (1 | ID) +  # Re-introduce the uncorrelated random intercept (often needed)
#                               (0 + Drugs_YN || ID), 
#                               (0 + Drink_YN || ID), 
#                             data = TLFB, family = binomial,
#                             control = glmerControl(optimizer = "optimx", 
#                                                    optCtrl = list(method = "nlminb", maxiter = 10000)))
# summary(model_M4_Substance)


# Model M4_Partner: Random Intercept + Random Slope for Partner_Type (no correlation)
# model_M4_Partner <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                             BSsum_scaled + Gender +  Drugs_YN + Drink_YN  + Partner_Type + 
#                             (1 | ID) + # Re-introduce the uncorrelated random intercept
#                             (0 + Partner_Type || ID), 
#                           data = TLFB, family = binomial,
#                           control = glmerControl(optimizer = "optimx", 
#                                                  optCtrl = list(method = "nlminb", maxiter = 10000)))
# 
# summary(model_M4_Partner)
# 


#### most conservative model. 


model_M2_RI <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
                       BSsum_scaled + Gender  +  Drugs_YN + Drink_YN + Partner_Type +
                       (1 | ID), # Random Intercept only
                     data = TLFB, family = binomial(link = "logit"),
                     glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(model_M2_RI)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +  
##     Gender + Drugs_YN + Drink_YN + Partner_Type + (1 | ID)
##    Data: TLFB
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     371.6     415.8    -175.8     351.6       605 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.0438  0.0750  0.1478  0.2501  2.2811 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 3.73     1.931   
## Number of obs: 615, groups:  ID, 53
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          3.51155    0.86530   4.058 4.95e-05 ***
## UCLA_Total_scaled    0.56567    0.35722   1.584   0.1133    
## STD_KQ_Total_scaled -0.05926    0.31014  -0.191   0.8485    
## BSsum_scaled         0.26548    0.44509   0.596   0.5509    
## Gender1             -1.50641    0.88045  -1.711   0.0871 .  
## Drugs_YN1            0.06264    0.53063   0.118   0.9060    
## Drink_YN1            0.01206    0.38433   0.031   0.9750    
## Partner_Type1        0.86341    0.46841   1.843   0.0653 .  
## Partner_Type2       -0.07253    0.49831  -0.146   0.8843    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) UCLA_T STD_KQ BSsm_s Gendr1 Drg_YN1 Drn_YN1 Prt_T1
## UCLA_Ttl_sc -0.021                                                   
## STD_KQ_Ttl_  0.109 -0.156                                            
## BSsum_scald  0.009  0.333 -0.313                                     
## Gender1     -0.707  0.242 -0.211  0.353                              
## Drugs_YN1   -0.261  0.040  0.060 -0.143  0.130                       
## Drink_YN1   -0.203 -0.019  0.100  0.000 -0.015 -0.283                
## Partnr_Typ1 -0.256  0.051 -0.042 -0.008 -0.037 -0.044   0.074        
## Partnr_Typ2 -0.418 -0.062 -0.053 -0.062  0.004  0.019   0.178   0.551
model_M2_RI <- glmer(STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
                       BSsum_scaled + Gender  +  Drugs_YN + Drink_YN + Partner_Type +
                       (1 | ID), # Random Intercept only
                     data = TLFB, family = binomial(link = "logit"),
                     glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(model_M2_RI)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +  
##     Gender + Drugs_YN + Drink_YN + Partner_Type + (1 | ID)
##    Data: TLFB
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     309.9     354.1    -145.0     289.9       605 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.9097 -0.1296 -0.0321  0.0867  5.8500 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 46.33    6.807   
## Number of obs: 615, groups:  ID, 53
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           2.7522     2.2892   1.202  0.22927    
## UCLA_Total_scaled    -0.6567     1.2860  -0.511  0.60960    
## STD_KQ_Total_scaled   0.9280     0.8903   1.042  0.29726    
## BSsum_scaled          1.7718     1.4183   1.249  0.21159    
## Gender1              -2.9062     2.8926  -1.005  0.31504    
## Drugs_YN1             1.1697     0.5963   1.962  0.04981 *  
## Drink_YN1             0.5531     0.4403   1.256  0.20908    
## Partner_Type1        -1.7979     0.5674  -3.169  0.00153 ** 
## Partner_Type2        -5.4105     0.8178  -6.616  3.7e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) UCLA_T STD_KQ BSsm_s Gendr1 Drg_YN1 Drn_YN1 Prt_T1
## UCLA_Ttl_sc -0.177                                                   
## STD_KQ_Ttl_  0.182 -0.305                                            
## BSsum_scald  0.003  0.425 -0.337                                     
## Gender1     -0.774  0.288 -0.273  0.312                              
## Drugs_YN1   -0.057 -0.009  0.069 -0.017  0.011                       
## Drink_YN1   -0.068  0.005  0.047  0.011 -0.016 -0.211                
## Partnr_Typ1 -0.156  0.031 -0.074 -0.033  0.037 -0.223   0.064        
## Partnr_Typ2 -0.216  0.029 -0.135 -0.060  0.095 -0.190   0.014   0.533
###"Although the ANOVA test comparing the Random Intercept model to the Complex Random Slope model 
######(model_M3_ComplexRS) was significant, all attempts to fit the complex model structure resulted 
#####in singular fits or convergence failures. This indicates that the data does not contain sufficient 
#######information to reliably estimate the random slope variances. 
#######Therefore, the most complex, stable model is the Random Intercept Only model,
####which was selected for final interpretation."

##########comparing GEE and glmer


#install.packages("geepack")
library(geepack)

TLFB$Condom_Access <- as.numeric(TLFB$Condom_Access)
#TLFB$Condom_Access

TLFB$Condom_Access <- gsub("1", "0", TLFB$Condom_Access)
TLFB$Condom_Access <- gsub("2", "1", TLFB$Condom_Access)
#TLFB$Condom_Access
TLFB$Condom_Access <- as.numeric(TLFB$Condom_Access)

# 2. Define the GEE model structure
# The fixed effects formula remains the same as your glmer model.
# The 'id' argument specifies the clustering variable (replaces the random effect).
# The 'corstr' argument specifies the working correlation structure.
geemodel_M2_Marginal <- geeglm(
  # Fixed Effects Formula
  Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
    BSsum_scaled + Gender +  Drugs_YN + Drink_YN  + Partner_Type,
  
  # Clustering/Grouping Variable (Required for GEE)
  id = ID,
  
  # Data and Link Function
  data = TLFB, 
  family = binomial(link = "logit"),
  
  # Working Correlation Structure
  # Common choices:
  # "exchangeable": Assumes constant correlation between all observations within an ID.
  # "ar1": Assumes correlation decays as the time between observations increases (often good for longitudinal data).
  # "independent": Assumes no correlation within an ID (least complex).
  corstr = "exchangeable" 
)


# 3. View the Summary and Robust Standard Errors
summary(geemodel_M2_Marginal)
## 
## Call:
## geeglm(formula = Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
##     BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type, 
##     family = binomial(link = "logit"), data = TLFB, id = ID, 
##     corstr = "exchangeable")
## 
##  Coefficients:
##                      Estimate   Std.err  Wald Pr(>|W|)   
## (Intercept)          2.060940  0.791770 6.775  0.00924 **
## UCLA_Total_scaled    0.394580  0.292118 1.825  0.17677   
## STD_KQ_Total_scaled -0.139220  0.196784 0.501  0.47927   
## BSsum_scaled         0.107552  0.324836 0.110  0.74057   
## Gender1             -0.861213  0.719294 1.434  0.23119   
## Drugs_YN1            0.009861  0.278854 0.001  0.97179   
## Drink_YN1            0.038637  0.184965 0.044  0.83453   
## Partner_Type1        0.611353  0.642623 0.905  0.34143   
## Partner_Type2       -0.004097  0.655607 0.000  0.99501   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = exchangeable 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)   0.9057  0.5454
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha   0.3385   0.207
## Number of clusters:   53  Maximum cluster size: 68
TLFB$STD
##   [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [75] 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [223] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## [482] 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1
## [519] 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
## [556] 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [593] 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0
## Levels: 0 1
#TLFB$STD[is.na(TLFB$STD)] <- 1

#TLFB$STD
#TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$STD <- as.numeric(TLFB$STD)
#TLFB$STD
TLFB$STD_numeric <- as.numeric(as.factor(TLFB$STD)) - 1
TLFB$STD_numeric
##   [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [75] 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [223] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## [482] 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1
## [519] 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
## [556] 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [593] 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0
# 2. Define the GEE model structure
# The fixed effects formula remains the same as your glmer model.
# The 'id' argument specifies the clustering variable (replaces the random effect).
# The 'corstr' argument specifies the working correlation structure.
geemodel_M2_Marginal_STD <- geeglm(
  # Fixed Effects Formula
  STD_numeric ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
    BSsum_scaled + Gender +  Drugs_YN + Drink_YN  + Partner_Type,
  
  # Clustering/Grouping Variable (Required for GEE)
  id = ID,
  
  # Data and Link Function
  data = TLFB, 
  family = binomial(link = "logit"),
  
  # Working Correlation Structure
  # Common choices:
  # "exchangeable": Assumes constant correlation between all observations within an ID.
  # "ar1": Assumes correlation decays as the time between observations increases (often good for longitudinal data).
  # "independent": Assumes no correlation within an ID (least complex).
  corstr = "exchangeable" 
)


# 3. View the Summary and Robust Standard Errors
summary(geemodel_M2_Marginal_STD)
## 
## Call:
## geeglm(formula = STD_numeric ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
##     BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type, 
##     family = binomial(link = "logit"), data = TLFB, id = ID, 
##     corstr = "exchangeable")
## 
##  Coefficients:
##                     Estimate Std.err Wald Pr(>|W|)   
## (Intercept)           0.8329  0.7480 1.24   0.2654   
## UCLA_Total_scaled    -0.4826  0.3473 1.93   0.1646   
## STD_KQ_Total_scaled   0.3693  0.2670 1.91   0.1665   
## BSsum_scaled         -0.5599  0.3783 2.19   0.1389   
## Gender1              -1.1883  0.7603 2.44   0.1181   
## Drugs_YN1             0.3569  0.4299 0.69   0.4064   
## Drink_YN1             0.0513  0.1706 0.09   0.7634   
## Partner_Type1        -0.6866  0.4953 1.92   0.1657   
## Partner_Type2        -1.8854  0.6728 7.85   0.0051 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = exchangeable 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)     1.83    7.61
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.583    2.02
## Number of clusters:   53  Maximum cluster size: 68
# install.packages("sjPlot")
library(sjPlot)
## 
## Attaching package: 'sjPlot'
## The following object is masked from 'package:ggplot2':
## 
##     set_theme
# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(geemodel_M2_Marginal, type = "est")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
##   Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#library(ggeffects)
#library(ggplot2) # Optional, for further customization
tab_model(geemodel_M2_Marginal)
  Condom_Access
Predictors Odds Ratios CI p
(Intercept) 7.85 1.66 – 37.07 0.009
UCLA Total scaled 1.48 0.84 – 2.63 0.177
STD KQ Total scaled 0.87 0.59 – 1.28 0.479
BSsum scaled 1.11 0.59 – 2.10 0.741
Gender [1] 0.42 0.10 – 1.73 0.231
Drugs YN [1] 1.01 0.58 – 1.74 0.972
Drink YN [1] 1.04 0.72 – 1.49 0.835
Partner Type [1] 1.84 0.52 – 6.49 0.341
Partner Type [2] 1.00 0.28 – 3.60 0.995
N ID 53
Observations 615
# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(geemodel_M2_Marginal_STD, type = "est")

#library(ggeffects)
#library(ggplot2) # Optional, for further customization



tab_model(geemodel_M2_Marginal_STD)
  STD_numeric
Predictors Odds Ratios CI p
(Intercept) 2.30 0.53 – 9.96 0.265
UCLA Total scaled 0.62 0.31 – 1.22 0.165
STD KQ Total scaled 1.45 0.86 – 2.44 0.166
BSsum scaled 0.57 0.27 – 1.20 0.139
Gender [1] 0.30 0.07 – 1.35 0.118
Drugs YN [1] 1.43 0.62 – 3.32 0.406
Drink YN [1] 1.05 0.75 – 1.47 0.763
Partner Type [1] 0.50 0.19 – 1.33 0.166
Partner Type [2] 0.15 0.04 – 0.57 0.005
N ID 53
Observations 615
##################table


predictor_vars <- c("UCLA_Total_scaled", "STD_KQ_Total_scaled", "BSsum_scaled", 
                    "Gender", "Drugs_YN", "Drink_YN", "Partner_Type")



# Use this code, simply removing the line that caused the error.
descriptive_table <- TLFB %>%
  select(Condom_Access, all_of(predictor_vars)) %>%
  tbl_summary(
    by = Condom_Access,
    statistic = list(all_continuous() ~ "{mean} ({sd})"), 
    label = list(
      UCLA_Total_scaled ~ "UCLA Score (Scaled)",
      STD_KQ_Total_scaled ~ "STD Knowledge (Scaled)",
      BSsum_scaled ~ "Behavior Score Sum (Scaled)",
      Gender ~ "Gender",
      Drugs_YN ~ "Drugs_YN Use",
      Drink_YN ~ "Drink",
      Partner_Type ~ "Partner Type"
    ),
    digits = list(all_continuous() ~ 2),
    type = all_of(predictor_vars) ~ "categorical"
  ) %>%
  add_overall() %>%
  modify_header(label = "**Characteristic**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Condom Access Status**")

# Use 'print()' or just the object name to display the table
print(descriptive_table)
Characteristic
Condom Access Status
Overall
N = 615
1
0
N = 92
1
1
N = 523
1
UCLA Score (Scaled)


    -2.90676298457177 3 (0.5%) 0 (0%) 3 (0.6%)
    -1.92460458838561 10 (1.6%) 2 (2.2%) 8 (1.5%)
    -1.8591273619732 10 (1.6%) 9 (9.8%) 1 (0.2%)
    -1.72817290914838 41 (6.7%) 2 (2.2%) 39 (7.5%)
    -1.46626400349874 4 (0.7%) 1 (1.1%) 3 (0.6%)
    -1.33530955067391 19 (3.1%) 13 (14%) 6 (1.1%)
    -1.07340064502427 8 (1.3%) 1 (1.1%) 7 (1.3%)
    -1.00792341861186 10 (1.6%) 1 (1.1%) 9 (1.7%)
    -0.876968965787038 5 (0.8%) 1 (1.1%) 4 (0.8%)
    -0.811491739374627 52 (8.5%) 1 (1.1%) 51 (9.8%)
    -0.746014512962216 1 (0.2%) 1 (1.1%) 0 (0%)
    -0.615060060137395 16 (2.6%) 10 (11%) 6 (1.1%)
    -0.549582833724984 27 (4.4%) 8 (8.7%) 19 (3.6%)
    -0.484105607312573 3 (0.5%) 1 (1.1%) 2 (0.4%)
    -0.418628380900162 31 (5.0%) 0 (0%) 31 (5.9%)
    -0.0257650224256974 11 (1.8%) 0 (0%) 11 (2.1%)
    0.0397122039867134 12 (2.0%) 0 (0%) 12 (2.3%)
    0.105189430399124 17 (2.8%) 2 (2.2%) 15 (2.9%)
    0.170666656811535 68 (11%) 0 (0%) 68 (13%)
    0.236143883223946 17 (2.8%) 1 (1.1%) 16 (3.1%)
    0.301621109636357 12 (2.0%) 0 (0%) 12 (2.3%)
    0.367098336048768 3 (0.5%) 2 (2.2%) 1 (0.2%)
    0.498052788873589 8 (1.3%) 0 (0%) 8 (1.5%)
    0.563530015286 57 (9.3%) 11 (12%) 46 (8.8%)
    0.629007241698411 35 (5.7%) 0 (0%) 35 (6.7%)
    0.890916147348054 4 (0.7%) 0 (0%) 4 (0.8%)
    0.956393373760465 37 (6.0%) 3 (3.3%) 34 (6.5%)
    1.08734782658529 29 (4.7%) 0 (0%) 29 (5.5%)
    1.21830227941011 1 (0.2%) 0 (0%) 1 (0.2%)
    1.28377950582252 2 (0.3%) 0 (0%) 2 (0.4%)
    1.48021118505975 29 (4.7%) 20 (22%) 9 (1.7%)
    1.67664286429698 29 (4.7%) 2 (2.2%) 27 (5.2%)
    1.87307454353422 4 (0.7%) 0 (0%) 4 (0.8%)
STD Knowledge (Scaled)


    -4.1526208932418 4 (0.7%) 0 (0%) 4 (0.8%)
    -2.74216700001074 6 (1.0%) 1 (1.1%) 5 (1.0%)
    -2.27201570226705 3 (0.5%) 0 (0%) 3 (0.6%)
    -1.80186440452337 8 (1.3%) 1 (1.1%) 7 (1.3%)
    -1.33171310677968 39 (6.3%) 18 (20%) 21 (4.0%)
    -0.861561809035992 110 (18%) 4 (4.3%) 106 (20%)
    -0.391410511292305 97 (16%) 8 (8.7%) 89 (17%)
    0.0787407864513819 128 (21%) 38 (41%) 90 (17%)
    0.548892084195069 64 (10%) 7 (7.6%) 57 (11%)
    1.01904338193876 123 (20%) 7 (7.6%) 116 (22%)
    1.95934597742613 29 (4.7%) 8 (8.7%) 21 (4.0%)
    2.8996485729135 4 (0.7%) 0 (0%) 4 (0.8%)
Behavior Score Sum (Scaled)


    -2.6145127906319 5 (0.8%) 1 (1.1%) 4 (0.8%)
    -2.21292049161192 4 (0.7%) 0 (0%) 4 (0.8%)
    -2.11252241685693 6 (1.0%) 1 (1.1%) 5 (1.0%)
    -1.96192530472444 1 (0.2%) 1 (1.1%) 0 (0%)
    -1.76112915521445 3 (0.5%) 0 (0%) 3 (0.6%)
    -1.53523348701571 21 (3.4%) 4 (4.3%) 17 (3.3%)
    -1.48503444963821 1 (0.2%) 0 (0%) 1 (0.2%)
    -1.45993493094946 4 (0.7%) 0 (0%) 4 (0.8%)
    -1.38463637488322 7 (1.1%) 0 (0%) 7 (1.3%)
    -1.30933781881697 31 (5.0%) 2 (2.2%) 29 (5.5%)
    -1.28423830012822 5 (0.8%) 0 (0%) 5 (1.0%)
    -1.20893974406197 4 (0.7%) 0 (0%) 4 (0.8%)
    -1.18384022537323 3 (0.5%) 2 (2.2%) 1 (0.2%)
    -1.13364118799573 3 (0.5%) 0 (0%) 3 (0.6%)
    -1.00814359455198 8 (1.3%) 1 (1.1%) 7 (1.3%)
    -0.957944557174487 9 (1.5%) 5 (5.4%) 4 (0.8%)
    -0.932845038485738 8 (1.3%) 1 (1.1%) 7 (1.3%)
    -0.681849851598251 46 (7.5%) 11 (12%) 35 (6.7%)
    -0.656750332909503 8 (1.3%) 0 (0%) 8 (1.5%)
    -0.531252739465759 6 (1.0%) 1 (1.1%) 5 (1.0%)
    -0.50615322077701 7 (1.1%) 0 (0%) 7 (1.3%)
    -0.481053702088262 46 (7.5%) 1 (1.1%) 45 (8.6%)
    -0.405755146022016 7 (1.1%) 0 (0%) 7 (1.3%)
    -0.355556108644518 9 (1.5%) 3 (3.3%) 6 (1.1%)
    -0.280257552578272 9 (1.5%) 7 (7.6%) 2 (0.4%)
    -0.255158033889523 6 (1.0%) 2 (2.2%) 4 (0.8%)
    -0.230058515200775 3 (0.5%) 0 (0%) 3 (0.6%)
    -0.0543618843795336 15 (2.4%) 2 (2.2%) 13 (2.5%)
    -0.00416284700203615 3 (0.5%) 1 (1.1%) 2 (0.4%)
    0.0209366716867126 4 (0.7%) 0 (0%) 4 (0.8%)
    0.0460361903754613 39 (6.3%) 1 (1.1%) 38 (7.3%)
    0.0962352277529587 38 (6.2%) 21 (23%) 17 (3.3%)
    0.196633302507954 3 (0.5%) 0 (0%) 3 (0.6%)
    0.347230414640446 35 (5.7%) 0 (0%) 35 (6.7%)
    0.372329933329195 17 (2.8%) 13 (14%) 4 (0.8%)
    0.422528970706692 15 (2.4%) 4 (4.3%) 11 (2.1%)
    0.497827526772938 8 (1.3%) 0 (0%) 8 (1.5%)
    0.522927045461687 5 (0.8%) 4 (4.3%) 1 (0.2%)
    0.573126082839184 17 (2.8%) 1 (1.1%) 16 (3.1%)
    0.698623676282928 4 (0.7%) 1 (1.1%) 3 (0.6%)
    1.35121116219039 8 (1.3%) 0 (0%) 8 (1.5%)
    1.37631068087914 18 (2.9%) 1 (1.1%) 17 (3.3%)
    1.42650971825664 116 (19%) 0 (0%) 116 (22%)
Gender


    0 286 (47%) 12 (13%) 274 (52%)
    1 329 (53%) 80 (87%) 249 (48%)
Drugs_YN Use


    0 348 (57%) 61 (66%) 287 (55%)
    1 267 (43%) 31 (34%) 236 (45%)
Drink


    0 327 (53%) 52 (57%) 275 (53%)
    1 288 (47%) 40 (43%) 248 (47%)
Partner Type


    0 102 (17%) 19 (21%) 83 (16%)
    1 168 (27%) 24 (26%) 144 (28%)
    2 345 (56%) 49 (53%) 296 (57%)
1 n (%)
###missing? 

model_vars <- c("Condom_Access", "STD_numeric", "UCLA_Total_scaled", "STD_KQ_Total_scaled", 
                "BSsum_scaled", "Gender", "Partner_Type", "ID", "Drugs_YN", "Drink_YN")
# Check missing counts and percentages for relevant variables
missing_info <- data.frame(
  Variable = model_vars,
  Count_NA = sapply(TLFB[model_vars], function(x) sum(is.na(x))),
  Percent_NA = sapply(TLFB[model_vars], function(x) mean(is.na(x))) * 100
)
print(missing_info)
                           Variable Count_NA Percent_NA

Condom_Access Condom_Access 0 0 STD_numeric STD_numeric 0 0 UCLA_Total_scaled UCLA_Total_scaled 0 0 STD_KQ_Total_scaled STD_KQ_Total_scaled 0 0 BSsum_scaled BSsum_scaled 0 0 Gender Gender 0 0 Partner_Type Partner_Type 0 0 ID ID 0 0 Drugs_YN Drugs_YN 0 0 Drink_YN Drink_YN 0 0