Untitled

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

setwd("~/Desktop/FDU LAB")
TLFB2 <- read.csv("TLFB_9-22-22(TLFB).csv")

TLFB <-TLFB2[-c(1,2, 618),]

#TLFB$STD
#TLFB$Partner_Type
#con <-data.frame(TLFB$STD,TLFB$Partner_Type)
# summary(TLFB)
# require(psych)
# describe(TLFB)
# getOption("max.print")
# options(max.print = 100000)
# describeTLFB <- describe(TLFB)
# describeTLFB
# TLFB$Condom_Access

WIDE <- read.csv("WIDE_FINAL(WIDE_FINAL).csv")
#describe(WIDE)
#describeWIDE <- describe(WIDE)
#describeWIDE


#There are two datasets from my dissertation project, one is in wide format and 
#the other is in long format. The datasets are the same,
#the data structure differs based on the type of analyses that were conducted - 
#long for multilevel models and wide for everything else.




###### variables 

#####demographics

#demo <- WIDE[,1:23]


######## condomaccess

#CDAC <-TLFB$Condom_Access

#######partner STI

#PSTI <-TLFB$STD
#require(stringr)
#TLFB$STD <- gsub("2", "1", TLFB$STD)
#TLFB$STD
######partner types

#ptype <- TLFB$Partner_Type
#TLFB$Partner_Type
#########Motivation

#moti <-TLFB$UCLA_Total



#########information

#info <-TLFB$STD_KQ_Total


#########behavior skill

# beha <- TLFB$CISQ_W_Total
# beha <- TLFB$CISQ_S_Total
# beha <- TLFB$CISQ_DR_Total
# beha <- TLFB$CISQ_RC_Total
# beha <- TLFB$CISQ_R_Total
# beha <- TLFB$CISQ_D_Total
# beha <- TLFB$CISQ_P_Total

BSsum <- data.frame(TLFB$CISQ_W_Total, TLFB$CISQ_S_Total, TLFB$CISQ_DR_Total, TLFB$CISQ_RC_Total, 
                     TLFB$CISQ_R_Total, TLFB$CISQ_D_Total, TLFB$CISQ_P_Total)
TLFB$BSsum <-rowSums(BSsum)
#TLFB$BSsum
##########substance use

#drinkuse <- TLFB$Drink_YN
#drug <-TLFB$Drugs_YN




######??lme4

#library(dplyr)
#install.packages("pacman")
#pacman::p_load(Matrix)
library(lme4)

## Loading required package: Matrix

###### as numeric 
TLFB$UCLA_Total <- as.numeric(as.character(TLFB$UCLA_Total))
TLFB$STD_KQ_Total <- as.numeric(as.character(TLFB$STD_KQ_Total))
TLFB$BSsum <- as.numeric(as.character(TLFB$BSsum))
TLFB$Condom_Access <- as.numeric(as.character(TLFB$Condom_Access))
TLFB$ID <- as.numeric(as.character(TLFB$ID))
TLFB$STD <- as.numeric(as.character(TLFB$STD))
TLFB$Gender <- as.numeric(as.character(TLFB$Gender))
TLFB$Drink_YN <- as.numeric(as.character(TLFB$Drink_YN))
TLFB$Drugs_YN <- as.numeric(as.character(TLFB$Drugs_YN))


#TLFB$Drink_YN
#TLFB$Drugs_YN
# combine the drug and alcohol use 
#subs <- data.frame(as.numeric(TLFB$Drink_YN), as.numeric(TLFB$Drugs_YN))
#require(stringr)
#subs$as.numeric.TLFB.Drugs_YN. <- as.numeric(gsub("1", "2", subs$as.numeric.TLFB.Drugs_YN.)) #drug = 2
#TLFB$substanceuse <-rowSums(subs) # 0 = no use, 1 = drink, 2 = drug, 3 = combined
#TLFB$substanceuse


#TLFB$substanceuse <-rowSums(subs)
#TLFB$substanceuse <- gsub("2", "1", TLFB$substanceuse)
#TLFB$substanceuse


#TLFB$Partner_Type
#TLFB$Partner_Type <- gsub("1", "0", TLFB$Partner_Type)
#TLFB$Partner_Type <- gsub("2", "1", TLFB$Partner_Type) # 0 New + casual, 1 regular
#TLFB$Partner_Type
###scaled numeric values 
TLFB$UCLA_Total_scaled <- scale(TLFB$UCLA_Total)
TLFB$STD_KQ_Total_scaled <- scale(TLFB$STD_KQ_Total)
TLFB$BSsum_scaled <- scale(TLFB$BSsum)
TLFB$ID_scaled <- scale(TLFB$ID)


#Test whether the categorical variables are factor variables
#Change to factor if the results of the above were "FALSE"
#TLFB$substanceuse <- factor(TLFB$substanceuse, levels = c(0,1))
TLFB$Gender <- factor(TLFB$Gender, levels = c(0,1)) # 0 = male, 1 = female
TLFB$Condom_Access <- factor(TLFB$Condom_Access, levels = c(0,1)) # 0 = no, 1 = yes

TLFB$STD <- gsub("2", "1", TLFB$STD) # 0 = don't know, 1 = postive or negative 
TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$Partner_Type <- factor(TLFB$Partner_Type, levels = c(0,1,2)) # Regular = 2, Casual = 1, New = 0
TLFB$Drugs_YN <- factor(TLFB$Drugs_YN, levels = c(0,1)) # 0 no 1 yes
TLFB$Drink_YN <- factor(TLFB$Drink_YN, levels = c(0,1)) # 0 no 1 yes

#is.factor(TLFB$substanceuse)
is.factor(TLFB$Gender)

## [1] TRUE

is.factor(TLFB$Condom_Access)

## [1] TRUE

is.factor(TLFB$STD)

## [1] TRUE

is.factor(TLFB$Partner_Type)

## [1] TRUE

is.factor(TLFB$Drink_YN)

## [1] TRUE

is.factor(TLFB$Drugs_YN)

## [1] TRUE

require(psych)

## Loading required package: psych

require(dplyr)

## Loading required package: dplyr

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(gtsummary)
##################table

#########random intercepts and slopes 


# 1. Load required libraries
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

library(dplyr)

# --- Assuming the 'TLFB' data is loaded in your R session ---

# 2. Prepare the Data and ENSURE Condom_Access is numeric
#    Create a sequential Observation_Index within each ID group.
TLFB_plot <- TLFB %>%
  group_by(ID) %>%
  mutate(Observation_Index = row_number(),
         # Ensure Condom_Access is treated as numeric 0 or 1
         Condom_Access_Numeric = as.numeric(as.character(Condom_Access))) %>%
  ungroup()

# 3. Generate the ggplot using the numeric outcome variable
p_raw <- ggplot(TLFB_plot, aes(x = Observation_Index, y = Condom_Access_Numeric, group = ID)) +
  
  # Add lines connecting the raw binary values for each ID.
  geom_line(aes(color = factor(ID)), alpha = 0.5) +
  
  # Add jittered points for the raw Condom_Access (0 or 1).
  geom_point(aes(color = factor(ID)),
             position = position_jitter(width = 0.2, height = 0.1),
             alpha = 0.8,
             size = 1.5) +
  
  # Apply a smooth line to visualize the trend for each ID using GLM.
  geom_smooth(aes(color = factor(ID)),
              method = "glm",
              method.args = list(family = binomial),
              se = FALSE,
              linewidth = 0.8) +
  
  # Use scale_y_continuous now that the variable is confirmed to be numeric
  scale_y_continuous(breaks = c(0, 1), labels = c("No Access (0)", "Access (1)")) +
  
  # Customize the plot appearance
  labs(
    title = "Raw Condom Access (0/1) Trajectories by Individual ID",
    subtitle = "Smooth lines show logistic trend per ID (for diagnostics)",
    x = "Observation Index (Sequential within ID)",
    y = "Raw Condom Access (0 = No, 1 = Yes)",
    color = "Participant ID"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

# Display the plot
print(p_raw)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#######model building 

#library(nlme)

#randomIntercept_GLMM <- glmer(Condom_Access ~ 1 + (1 | ID), 
#                              data = TLFB, 
#                              family = binomial)

#summary(randomIntercept_GLMM)


#model_M1_fixed <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                          BSsum_scaled + (1 | ID),
#                        data = TLFB, family = binomial(link = "logit"))
#summary(model_M1_fixed)


#anova(randomIntercept_GLMM, model_M1_fixed)


######simple model to complicated ones to see the model fits. 

##Baseline Model (Fixed Effects Only)



##Random Intercepts Model
# 
# 
# model_M2_RI <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                        BSsum_scaled + Gender  + Drugs_YN + Drink_YN + Partner_Type +
#                        (1 | ID), # Random Intercept only
#                      data = TLFB, family = binomial(link = "logit"),
#                      glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
# 
# summary(model_M2_RI)
# Compare M1 (Fixed) vs. M2 (Random Intercept)
# This checks if adding the random intercept significantly improves the model fit.


#anova(model_M1_fixed, model_M2_RI)



##Complex Random Slopes Model
# 
# model_M3_ComplexRS <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                               BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                               (Drugs_YN + Drink_YN + Partner_Type | ID), # Random Intercept AND Slopes
#                             data = TLFB, family = binomial(link = "logit"),
#                          glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
# 
# summary(model_M3_ComplexRS)
##Interpretation: If the $\text{p-value}$ is small (e.g., $< 0.05$), $\text{M2}$ is significantly better than $\text{M1}$, 
####indicating that the clustering by $\text{ID}$ must be accounted for.

# Compare M2 (Random Intercept) vs. M3 (Complex Random Slopes)
# This checks if the added complexity of random slopes significantly improves the fit.
#anova(model_M2_RI, model_M3_ComplexRS)
###Interpretation: If the $\text{p-value}$ is small (e.g., $< 0.05$),
####$\text{M3}$ is significantly better than $\text{M2}$, 
####justifying the use of the complex random effects structure.
####Note: The anova() function uses a Likelihood Ratio Test, 
#####which is appropriate for comparing nested models 
#####(models where one is a specific case of the other, e.g., $\text{M2}$ is $\text{M3}$ with the random slope variances fixed to zero).




###Notice the correlation between the (Intercept) and substanceuse1 is $-0.89$.
#####A correlation close to $\mathbf{-1}$ or $\mathbf{1}$ means the random effects are highly redundant
######(i.e., they are explaining the same portion of variance in a highly predictable way).
######This is the equivalent of multicollinearity in the random effects structure and is a frequent cause of the singular fit warning, even when individual variances are large.

###### Correct approach: Keep the random intercepts and slopes, but force their covariance to zero
#####Since the variances are large (meaning the slopes are needed) 
#####but the correlations are high (meaning the model is unstable), 
#####the best step is to use the simplification recommended previously: 
#####remove the correlations while keeping the large variances. 
###You can do this by splitting the random effect term into uncorrelated components using the double bar operator
# 
# library(optimx)
# model_M3_Simplified_Uncorr <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                               BSsum_scaled + Gender + Drugs_YN + Drink_YN  + Partner_Type +
#                               (1 || ID) +  # Uncorrelated Random Intercept
#                               (Drugs_YN || ID) + 
#                               (Drink_YN || ID) +
#                               (Partner_Type || ID), 
#                             data = TLFB, family = binomial,
#                             control = glmerControl(optimizer = "optimx", 
#                                                    optCtrl = list(method = "nlminb", maxiter = 10000)))
# 
# summary(model_M3_Simplified_Uncorr)



# Option to test first, removes all covariance terms:
# model_M3_Simplified <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                                BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                                (Drugs_YN || ID) + 
#                                (Drink_YN || ID) + 
#                                (Partner_Type || ID), 
#                                data = TLFB, family = binomial, 
#                                control = glmerControl(optimizer = "optimx", 
#                                                     optCtrl = list(method = "nlminb", maxiter = 10000)))
# summary(model_M3_Simplified)
# 
# 
# 
# model_M3_Final <- glmer(Condom_Access  ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                           BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                           (0 + Drugs_YN || ID) +  # Exclude Random Intercept
#                           (0 + Drink_YN || ID) +  # Exclude Random Intercept
#                           (0 + Partner_Type || ID),   # Exclude Random Intercept
#                         data = TLFB, family = binomial,
#                         control = glmerControl(optimizer = "optimx", 
#                                              optCtrl = list(method = "nlminb", maxiter = 10000)))
# 
# summary(model_M3_Final)
# 
# 
# # Model M4_Substance: Random Intercept + Random Slope for substanceuse (no correlation)
# model_M4_Substance <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                               BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
#                               (1 | ID) +  # Re-introduce the uncorrelated random intercept (often needed)
#                               (0 + Drugs_YN || ID), 
#                               (0 + Drink_YN || ID), 
#                             data = TLFB, family = binomial,
#                             control = glmerControl(optimizer = "optimx", 
#                                                    optCtrl = list(method = "nlminb", maxiter = 10000)))
# summary(model_M4_Substance)


# Model M4_Partner: Random Intercept + Random Slope for Partner_Type (no correlation)
# model_M4_Partner <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
#                             BSsum_scaled + Gender +  Drugs_YN + Drink_YN  + Partner_Type + 
#                             (1 | ID) + # Re-introduce the uncorrelated random intercept
#                             (0 + Partner_Type || ID), 
#                           data = TLFB, family = binomial,
#                           control = glmerControl(optimizer = "optimx", 
#                                                  optCtrl = list(method = "nlminb", maxiter = 10000)))
# 
# summary(model_M4_Partner)
# 


#### most conservative model. 


model_M2_RI <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
                       BSsum_scaled + Gender  +  Drugs_YN + Drink_YN + Partner_Type +
                       (1 | ID), # Random Intercept only
                     data = TLFB, family = binomial(link = "logit"),
                     glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(model_M2_RI)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +  
##     Gender + Drugs_YN + Drink_YN + Partner_Type + (1 | ID)
##    Data: TLFB
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     371.6     415.8    -175.8     351.6       605 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.0438  0.0750  0.1478  0.2501  2.2811 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 3.73     1.931   
## Number of obs: 615, groups:  ID, 53
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          3.51155    0.86530   4.058 4.95e-05 ***
## UCLA_Total_scaled    0.56567    0.35722   1.584   0.1133    
## STD_KQ_Total_scaled -0.05926    0.31014  -0.191   0.8485    
## BSsum_scaled         0.26548    0.44509   0.596   0.5509    
## Gender1             -1.50641    0.88045  -1.711   0.0871 .  
## Drugs_YN1            0.06264    0.53063   0.118   0.9060    
## Drink_YN1            0.01206    0.38433   0.031   0.9750    
## Partner_Type1        0.86341    0.46841   1.843   0.0653 .  
## Partner_Type2       -0.07253    0.49831  -0.146   0.8843    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) UCLA_T STD_KQ BSsm_s Gendr1 Drg_YN1 Drn_YN1 Prt_T1
## UCLA_Ttl_sc -0.021                                                   
## STD_KQ_Ttl_  0.109 -0.156                                            
## BSsum_scald  0.009  0.333 -0.313                                     
## Gender1     -0.707  0.242 -0.211  0.353                              
## Drugs_YN1   -0.261  0.040  0.060 -0.143  0.130                       
## Drink_YN1   -0.203 -0.019  0.100  0.000 -0.015 -0.283                
## Partnr_Typ1 -0.256  0.051 -0.042 -0.008 -0.037 -0.044   0.074        
## Partnr_Typ2 -0.418 -0.062 -0.053 -0.062  0.004  0.019   0.178   0.551

model_M2_RI <- glmer(STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
                       BSsum_scaled + Gender  +  Drugs_YN + Drink_YN + Partner_Type +
                       (1 | ID), # Random Intercept only
                     data = TLFB, family = binomial(link = "logit"),
                     glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(model_M2_RI)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +  
##     Gender + Drugs_YN + Drink_YN + Partner_Type + (1 | ID)
##    Data: TLFB
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     309.9     354.1    -145.0     289.9       605 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.9097 -0.1296 -0.0321  0.0867  5.8500 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ID     (Intercept) 46.33    6.807   
## Number of obs: 615, groups:  ID, 53
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           2.7522     2.2892   1.202  0.22927    
## UCLA_Total_scaled    -0.6567     1.2860  -0.511  0.60960    
## STD_KQ_Total_scaled   0.9280     0.8903   1.042  0.29726    
## BSsum_scaled          1.7718     1.4183   1.249  0.21159    
## Gender1              -2.9062     2.8926  -1.005  0.31504    
## Drugs_YN1             1.1697     0.5963   1.962  0.04981 *  
## Drink_YN1             0.5531     0.4403   1.256  0.20908    
## Partner_Type1        -1.7979     0.5674  -3.169  0.00153 ** 
## Partner_Type2        -5.4105     0.8178  -6.616  3.7e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) UCLA_T STD_KQ BSsm_s Gendr1 Drg_YN1 Drn_YN1 Prt_T1
## UCLA_Ttl_sc -0.177                                                   
## STD_KQ_Ttl_  0.182 -0.305                                            
## BSsum_scald  0.003  0.425 -0.337                                     
## Gender1     -0.774  0.288 -0.273  0.312                              
## Drugs_YN1   -0.057 -0.009  0.069 -0.017  0.011                       
## Drink_YN1   -0.068  0.005  0.047  0.011 -0.016 -0.211                
## Partnr_Typ1 -0.156  0.031 -0.074 -0.033  0.037 -0.223   0.064        
## Partnr_Typ2 -0.216  0.029 -0.135 -0.060  0.095 -0.190   0.014   0.533

###"Although the ANOVA test comparing the Random Intercept model to the Complex Random Slope model 
######(model_M3_ComplexRS) was significant, all attempts to fit the complex model structure resulted 
#####in singular fits or convergence failures. This indicates that the data does not contain sufficient 
#######information to reliably estimate the random slope variances. 
#######Therefore, the most complex, stable model is the Random Intercept Only model,
####which was selected for final interpretation."

##########comparing GEE and glmer


#install.packages("geepack")
library(geepack)

TLFB$Condom_Access <- as.numeric(TLFB$Condom_Access)
#TLFB$Condom_Access

TLFB$Condom_Access <- gsub("1", "0", TLFB$Condom_Access)
TLFB$Condom_Access <- gsub("2", "1", TLFB$Condom_Access)
#TLFB$Condom_Access
TLFB$Condom_Access <- as.numeric(TLFB$Condom_Access)

# 2. Define the GEE model structure
# The fixed effects formula remains the same as your glmer model.
# The 'id' argument specifies the clustering variable (replaces the random effect).
# The 'corstr' argument specifies the working correlation structure.
geemodel_M2_Marginal <- geeglm(
  # Fixed Effects Formula
  Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
    BSsum_scaled + Gender +  Drugs_YN + Drink_YN  + Partner_Type,
  
  # Clustering/Grouping Variable (Required for GEE)
  id = ID,
  
  # Data and Link Function
  data = TLFB, 
  family = binomial(link = "logit"),
  
  # Working Correlation Structure
  # Common choices:
  # "exchangeable": Assumes constant correlation between all observations within an ID.
  # "ar1": Assumes correlation decays as the time between observations increases (often good for longitudinal data).
  # "independent": Assumes no correlation within an ID (least complex).
  corstr = "exchangeable" 
)


# 3. View the Summary and Robust Standard Errors
summary(geemodel_M2_Marginal)

## 
## Call:
## geeglm(formula = Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
##     BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type, 
##     family = binomial(link = "logit"), data = TLFB, id = ID, 
##     corstr = "exchangeable")
## 
##  Coefficients:
##                      Estimate   Std.err  Wald Pr(>|W|)   
## (Intercept)          2.060940  0.791770 6.775  0.00924 **
## UCLA_Total_scaled    0.394580  0.292118 1.825  0.17677   
## STD_KQ_Total_scaled -0.139220  0.196784 0.501  0.47927   
## BSsum_scaled         0.107552  0.324836 0.110  0.74057   
## Gender1             -0.861213  0.719294 1.434  0.23119   
## Drugs_YN1            0.009861  0.278854 0.001  0.97179   
## Drink_YN1            0.038637  0.184965 0.044  0.83453   
## Partner_Type1        0.611353  0.642623 0.905  0.34143   
## Partner_Type2       -0.004097  0.655607 0.000  0.99501   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = exchangeable 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)   0.9057  0.5454
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha   0.3385   0.207
## Number of clusters:   53  Maximum cluster size: 68

TLFB$STD

##   [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [75] 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [223] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## [482] 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1
## [519] 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
## [556] 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [593] 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0
## Levels: 0 1

#TLFB$STD[is.na(TLFB$STD)] <- 1

#TLFB$STD
#TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$STD <- as.numeric(TLFB$STD)
#TLFB$STD
TLFB$STD_numeric <- as.numeric(as.factor(TLFB$STD)) - 1
TLFB$STD_numeric

##   [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
##  [75] 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [223] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## [482] 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1
## [519] 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
## [556] 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [593] 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0

# 2. Define the GEE model structure
# The fixed effects formula remains the same as your glmer model.
# The 'id' argument specifies the clustering variable (replaces the random effect).
# The 'corstr' argument specifies the working correlation structure.
geemodel_M2_Marginal_STD <- geeglm(
  # Fixed Effects Formula
  STD_numeric ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
    BSsum_scaled + Gender +  Drugs_YN + Drink_YN  + Partner_Type,
  
  # Clustering/Grouping Variable (Required for GEE)
  id = ID,
  
  # Data and Link Function
  data = TLFB, 
  family = binomial(link = "logit"),
  
  # Working Correlation Structure
  # Common choices:
  # "exchangeable": Assumes constant correlation between all observations within an ID.
  # "ar1": Assumes correlation decays as the time between observations increases (often good for longitudinal data).
  # "independent": Assumes no correlation within an ID (least complex).
  corstr = "exchangeable" 
)


# 3. View the Summary and Robust Standard Errors
summary(geemodel_M2_Marginal_STD)

## 
## Call:
## geeglm(formula = STD_numeric ~ UCLA_Total_scaled + STD_KQ_Total_scaled + 
##     BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type, 
##     family = binomial(link = "logit"), data = TLFB, id = ID, 
##     corstr = "exchangeable")
## 
##  Coefficients:
##                     Estimate Std.err Wald Pr(>|W|)   
## (Intercept)           0.8329  0.7480 1.24   0.2654   
## UCLA_Total_scaled    -0.4826  0.3473 1.93   0.1646   
## STD_KQ_Total_scaled   0.3693  0.2670 1.91   0.1665   
## BSsum_scaled         -0.5599  0.3783 2.19   0.1389   
## Gender1              -1.1883  0.7603 2.44   0.1181   
## Drugs_YN1             0.3569  0.4299 0.69   0.4064   
## Drink_YN1             0.0513  0.1706 0.09   0.7634   
## Partner_Type1        -0.6866  0.4953 1.92   0.1657   
## Partner_Type2        -1.8854  0.6728 7.85   0.0051 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = exchangeable 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)     1.83    7.61
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.583    2.02
## Number of clusters:   53  Maximum cluster size: 68

# install.packages("sjPlot")
library(sjPlot)

## 
## Attaching package: 'sjPlot'

## The following object is masked from 'package:ggplot2':
## 
##     set_theme

# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(geemodel_M2_Marginal, type = "est")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
##   Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#library(ggeffects)
#library(ggplot2) # Optional, for further customization
tab_model(geemodel_M2_Marginal)

	Condom_Access
Predictors	Odds Ratios	CI	p
(Intercept)	7.85	1.66 – 37.07	0.009
UCLA Total scaled	1.48	0.84 – 2.63	0.177
STD KQ Total scaled	0.87	0.59 – 1.28	0.479
BSsum scaled	1.11	0.59 – 2.10	0.741
Gender [1]	0.42	0.10 – 1.73	0.231
Drugs YN [1]	1.01	0.58 – 1.74	0.972
Drink YN [1]	1.04	0.72 – 1.49	0.835
Partner Type [1]	1.84	0.52 – 6.49	0.341
Partner Type [2]	1.00	0.28 – 3.60	0.995
N _ID	53
Observations	615

# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(geemodel_M2_Marginal_STD, type = "est")

#library(ggeffects)
#library(ggplot2) # Optional, for further customization



tab_model(geemodel_M2_Marginal_STD)

	STD_numeric
Predictors	Odds Ratios	CI	p
(Intercept)	2.30	0.53 – 9.96	0.265
UCLA Total scaled	0.62	0.31 – 1.22	0.165
STD KQ Total scaled	1.45	0.86 – 2.44	0.166
BSsum scaled	0.57	0.27 – 1.20	0.139
Gender [1]	0.30	0.07 – 1.35	0.118
Drugs YN [1]	1.43	0.62 – 3.32	0.406
Drink YN [1]	1.05	0.75 – 1.47	0.763
Partner Type [1]	0.50	0.19 – 1.33	0.166
Partner Type [2]	0.15	0.04 – 0.57	0.005
N _ID	53
Observations	615

##################table


predictor_vars <- c("UCLA_Total_scaled", "STD_KQ_Total_scaled", "BSsum_scaled", 
                    "Gender", "Drugs_YN", "Drink_YN", "Partner_Type")



# Use this code, simply removing the line that caused the error.
descriptive_table <- TLFB %>%
  select(Condom_Access, all_of(predictor_vars)) %>%
  tbl_summary(
    by = Condom_Access,
    statistic = list(all_continuous() ~ "{mean} ({sd})"), 
    label = list(
      UCLA_Total_scaled ~ "UCLA Score (Scaled)",
      STD_KQ_Total_scaled ~ "STD Knowledge (Scaled)",
      BSsum_scaled ~ "Behavior Score Sum (Scaled)",
      Gender ~ "Gender",
      Drugs_YN ~ "Drugs_YN Use",
      Drink_YN ~ "Drink",
      Partner_Type ~ "Partner Type"
    ),
    digits = list(all_continuous() ~ 2),
    type = all_of(predictor_vars) ~ "categorical"
  ) %>%
  add_overall() %>%
  modify_header(label = "**Characteristic**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Condom Access Status**")

# Use 'print()' or just the object name to display the table
print(descriptive_table)

Characteristic	Condom Access Status
Characteristic	Overall N = 615¹	0 N = 92¹	1 N = 523¹
UCLA Score (Scaled)
-2.90676298457177	3 (0.5%)	0 (0%)	3 (0.6%)
-1.92460458838561	10 (1.6%)	2 (2.2%)	8 (1.5%)
-1.8591273619732	10 (1.6%)	9 (9.8%)	1 (0.2%)
-1.72817290914838	41 (6.7%)	2 (2.2%)	39 (7.5%)
-1.46626400349874	4 (0.7%)	1 (1.1%)	3 (0.6%)
-1.33530955067391	19 (3.1%)	13 (14%)	6 (1.1%)
-1.07340064502427	8 (1.3%)	1 (1.1%)	7 (1.3%)
-1.00792341861186	10 (1.6%)	1 (1.1%)	9 (1.7%)
-0.876968965787038	5 (0.8%)	1 (1.1%)	4 (0.8%)
-0.811491739374627	52 (8.5%)	1 (1.1%)	51 (9.8%)
-0.746014512962216	1 (0.2%)	1 (1.1%)	0 (0%)
-0.615060060137395	16 (2.6%)	10 (11%)	6 (1.1%)
-0.549582833724984	27 (4.4%)	8 (8.7%)	19 (3.6%)
-0.484105607312573	3 (0.5%)	1 (1.1%)	2 (0.4%)
-0.418628380900162	31 (5.0%)	0 (0%)	31 (5.9%)
-0.0257650224256974	11 (1.8%)	0 (0%)	11 (2.1%)
0.0397122039867134	12 (2.0%)	0 (0%)	12 (2.3%)
0.105189430399124	17 (2.8%)	2 (2.2%)	15 (2.9%)
0.170666656811535	68 (11%)	0 (0%)	68 (13%)
0.236143883223946	17 (2.8%)	1 (1.1%)	16 (3.1%)
0.301621109636357	12 (2.0%)	0 (0%)	12 (2.3%)
0.367098336048768	3 (0.5%)	2 (2.2%)	1 (0.2%)
0.498052788873589	8 (1.3%)	0 (0%)	8 (1.5%)
0.563530015286	57 (9.3%)	11 (12%)	46 (8.8%)
0.629007241698411	35 (5.7%)	0 (0%)	35 (6.7%)
0.890916147348054	4 (0.7%)	0 (0%)	4 (0.8%)
0.956393373760465	37 (6.0%)	3 (3.3%)	34 (6.5%)
1.08734782658529	29 (4.7%)	0 (0%)	29 (5.5%)
1.21830227941011	1 (0.2%)	0 (0%)	1 (0.2%)
1.28377950582252	2 (0.3%)	0 (0%)	2 (0.4%)
1.48021118505975	29 (4.7%)	20 (22%)	9 (1.7%)
1.67664286429698	29 (4.7%)	2 (2.2%)	27 (5.2%)
1.87307454353422	4 (0.7%)	0 (0%)	4 (0.8%)
STD Knowledge (Scaled)
-4.1526208932418	4 (0.7%)	0 (0%)	4 (0.8%)
-2.74216700001074	6 (1.0%)	1 (1.1%)	5 (1.0%)
-2.27201570226705	3 (0.5%)	0 (0%)	3 (0.6%)
-1.80186440452337	8 (1.3%)	1 (1.1%)	7 (1.3%)
-1.33171310677968	39 (6.3%)	18 (20%)	21 (4.0%)
-0.861561809035992	110 (18%)	4 (4.3%)	106 (20%)
-0.391410511292305	97 (16%)	8 (8.7%)	89 (17%)
0.0787407864513819	128 (21%)	38 (41%)	90 (17%)
0.548892084195069	64 (10%)	7 (7.6%)	57 (11%)
1.01904338193876	123 (20%)	7 (7.6%)	116 (22%)
1.95934597742613	29 (4.7%)	8 (8.7%)	21 (4.0%)
2.8996485729135	4 (0.7%)	0 (0%)	4 (0.8%)
Behavior Score Sum (Scaled)
-2.6145127906319	5 (0.8%)	1 (1.1%)	4 (0.8%)
-2.21292049161192	4 (0.7%)	0 (0%)	4 (0.8%)
-2.11252241685693	6 (1.0%)	1 (1.1%)	5 (1.0%)
-1.96192530472444	1 (0.2%)	1 (1.1%)	0 (0%)
-1.76112915521445	3 (0.5%)	0 (0%)	3 (0.6%)
-1.53523348701571	21 (3.4%)	4 (4.3%)	17 (3.3%)
-1.48503444963821	1 (0.2%)	0 (0%)	1 (0.2%)
-1.45993493094946	4 (0.7%)	0 (0%)	4 (0.8%)
-1.38463637488322	7 (1.1%)	0 (0%)	7 (1.3%)
-1.30933781881697	31 (5.0%)	2 (2.2%)	29 (5.5%)
-1.28423830012822	5 (0.8%)	0 (0%)	5 (1.0%)
-1.20893974406197	4 (0.7%)	0 (0%)	4 (0.8%)
-1.18384022537323	3 (0.5%)	2 (2.2%)	1 (0.2%)
-1.13364118799573	3 (0.5%)	0 (0%)	3 (0.6%)
-1.00814359455198	8 (1.3%)	1 (1.1%)	7 (1.3%)
-0.957944557174487	9 (1.5%)	5 (5.4%)	4 (0.8%)
-0.932845038485738	8 (1.3%)	1 (1.1%)	7 (1.3%)
-0.681849851598251	46 (7.5%)	11 (12%)	35 (6.7%)
-0.656750332909503	8 (1.3%)	0 (0%)	8 (1.5%)
-0.531252739465759	6 (1.0%)	1 (1.1%)	5 (1.0%)
-0.50615322077701	7 (1.1%)	0 (0%)	7 (1.3%)
-0.481053702088262	46 (7.5%)	1 (1.1%)	45 (8.6%)
-0.405755146022016	7 (1.1%)	0 (0%)	7 (1.3%)
-0.355556108644518	9 (1.5%)	3 (3.3%)	6 (1.1%)
-0.280257552578272	9 (1.5%)	7 (7.6%)	2 (0.4%)
-0.255158033889523	6 (1.0%)	2 (2.2%)	4 (0.8%)
-0.230058515200775	3 (0.5%)	0 (0%)	3 (0.6%)
-0.0543618843795336	15 (2.4%)	2 (2.2%)	13 (2.5%)
-0.00416284700203615	3 (0.5%)	1 (1.1%)	2 (0.4%)
0.0209366716867126	4 (0.7%)	0 (0%)	4 (0.8%)
0.0460361903754613	39 (6.3%)	1 (1.1%)	38 (7.3%)
0.0962352277529587	38 (6.2%)	21 (23%)	17 (3.3%)
0.196633302507954	3 (0.5%)	0 (0%)	3 (0.6%)
0.347230414640446	35 (5.7%)	0 (0%)	35 (6.7%)
0.372329933329195	17 (2.8%)	13 (14%)	4 (0.8%)
0.422528970706692	15 (2.4%)	4 (4.3%)	11 (2.1%)
0.497827526772938	8 (1.3%)	0 (0%)	8 (1.5%)
0.522927045461687	5 (0.8%)	4 (4.3%)	1 (0.2%)
0.573126082839184	17 (2.8%)	1 (1.1%)	16 (3.1%)
0.698623676282928	4 (0.7%)	1 (1.1%)	3 (0.6%)
1.35121116219039	8 (1.3%)	0 (0%)	8 (1.5%)
1.37631068087914	18 (2.9%)	1 (1.1%)	17 (3.3%)
1.42650971825664	116 (19%)	0 (0%)	116 (22%)
Gender
0	286 (47%)	12 (13%)	274 (52%)
1	329 (53%)	80 (87%)	249 (48%)
Drugs_YN Use
0	348 (57%)	61 (66%)	287 (55%)
1	267 (43%)	31 (34%)	236 (45%)
Drink
0	327 (53%)	52 (57%)	275 (53%)
1	288 (47%)	40 (43%)	248 (47%)
Partner Type
0	102 (17%)	19 (21%)	83 (16%)
1	168 (27%)	24 (26%)	144 (28%)
2	345 (56%)	49 (53%)	296 (57%)
¹ n (%)

###missing? 

model_vars <- c("Condom_Access", "STD_numeric", "UCLA_Total_scaled", "STD_KQ_Total_scaled", 
                "BSsum_scaled", "Gender", "Partner_Type", "ID", "Drugs_YN", "Drink_YN")
# Check missing counts and percentages for relevant variables
missing_info <- data.frame(
  Variable = model_vars,
  Count_NA = sapply(TLFB[model_vars], function(x) sum(is.na(x))),
  Percent_NA = sapply(TLFB[model_vars], function(x) mean(is.na(x))) * 100
)
print(missing_info)

                           Variable Count_NA Percent_NA

Condom_Access Condom_Access 0 0 STD_numeric STD_numeric 0 0 UCLA_Total_scaled UCLA_Total_scaled 0 0 STD_KQ_Total_scaled STD_KQ_Total_scaled 0 0 BSsum_scaled BSsum_scaled 0 0 Gender Gender 0 0 Partner_Type Partner_Type 0 0 ID ID 0 0 Drugs_YN Drugs_YN 0 0 Drink_YN Drink_YN 0 0

Untitled

Dawoon Lee

2025-12-30

R Markdown