This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
setwd("~/Desktop/FDU LAB")
TLFB2 <- read.csv("TLFB_9-22-22(TLFB).csv")
TLFB <-TLFB2[-c(1,2, 618),]
#TLFB$STD
#TLFB$Partner_Type
#con <-data.frame(TLFB$STD,TLFB$Partner_Type)
# summary(TLFB)
# require(psych)
# describe(TLFB)
# getOption("max.print")
# options(max.print = 100000)
# describeTLFB <- describe(TLFB)
# describeTLFB
# TLFB$Condom_Access
WIDE <- read.csv("WIDE_FINAL(WIDE_FINAL).csv")
#describe(WIDE)
#describeWIDE <- describe(WIDE)
#describeWIDE
#There are two datasets from my dissertation project, one is in wide format and
#the other is in long format. The datasets are the same,
#the data structure differs based on the type of analyses that were conducted -
#long for multilevel models and wide for everything else.
###### variables
#####demographics
#demo <- WIDE[,1:23]
######## condomaccess
#CDAC <-TLFB$Condom_Access
#######partner STI
#PSTI <-TLFB$STD
#require(stringr)
#TLFB$STD <- gsub("2", "1", TLFB$STD)
#TLFB$STD
######partner types
#ptype <- TLFB$Partner_Type
#TLFB$Partner_Type
#########Motivation
#moti <-TLFB$UCLA_Total
#########information
#info <-TLFB$STD_KQ_Total
#########behavior skill
# beha <- TLFB$CISQ_W_Total
# beha <- TLFB$CISQ_S_Total
# beha <- TLFB$CISQ_DR_Total
# beha <- TLFB$CISQ_RC_Total
# beha <- TLFB$CISQ_R_Total
# beha <- TLFB$CISQ_D_Total
# beha <- TLFB$CISQ_P_Total
BSsum <- data.frame(TLFB$CISQ_W_Total, TLFB$CISQ_S_Total, TLFB$CISQ_DR_Total, TLFB$CISQ_RC_Total,
TLFB$CISQ_R_Total, TLFB$CISQ_D_Total, TLFB$CISQ_P_Total)
TLFB$BSsum <-rowSums(BSsum)
#TLFB$BSsum
##########substance use
#drinkuse <- TLFB$Drink_YN
#drug <-TLFB$Drugs_YN
######??lme4
#library(dplyr)
#install.packages("pacman")
#pacman::p_load(Matrix)
library(lme4)
## Loading required package: Matrix
###### as numeric
TLFB$UCLA_Total <- as.numeric(as.character(TLFB$UCLA_Total))
TLFB$STD_KQ_Total <- as.numeric(as.character(TLFB$STD_KQ_Total))
TLFB$BSsum <- as.numeric(as.character(TLFB$BSsum))
TLFB$Condom_Access <- as.numeric(as.character(TLFB$Condom_Access))
TLFB$ID <- as.numeric(as.character(TLFB$ID))
TLFB$STD <- as.numeric(as.character(TLFB$STD))
TLFB$Gender <- as.numeric(as.character(TLFB$Gender))
TLFB$Drink_YN <- as.numeric(as.character(TLFB$Drink_YN))
TLFB$Drugs_YN <- as.numeric(as.character(TLFB$Drugs_YN))
#TLFB$Drink_YN
#TLFB$Drugs_YN
# combine the drug and alcohol use
#subs <- data.frame(as.numeric(TLFB$Drink_YN), as.numeric(TLFB$Drugs_YN))
#require(stringr)
#subs$as.numeric.TLFB.Drugs_YN. <- as.numeric(gsub("1", "2", subs$as.numeric.TLFB.Drugs_YN.)) #drug = 2
#TLFB$substanceuse <-rowSums(subs) # 0 = no use, 1 = drink, 2 = drug, 3 = combined
#TLFB$substanceuse
#TLFB$substanceuse <-rowSums(subs)
#TLFB$substanceuse <- gsub("2", "1", TLFB$substanceuse)
#TLFB$substanceuse
#TLFB$Partner_Type
#TLFB$Partner_Type <- gsub("1", "0", TLFB$Partner_Type)
#TLFB$Partner_Type <- gsub("2", "1", TLFB$Partner_Type) # 0 New + casual, 1 regular
#TLFB$Partner_Type
###scaled numeric values
TLFB$UCLA_Total_scaled <- scale(TLFB$UCLA_Total)
TLFB$STD_KQ_Total_scaled <- scale(TLFB$STD_KQ_Total)
TLFB$BSsum_scaled <- scale(TLFB$BSsum)
TLFB$ID_scaled <- scale(TLFB$ID)
#Test whether the categorical variables are factor variables
#Change to factor if the results of the above were "FALSE"
#TLFB$substanceuse <- factor(TLFB$substanceuse, levels = c(0,1))
TLFB$Gender <- factor(TLFB$Gender, levels = c(0,1)) # 0 = male, 1 = female
TLFB$Condom_Access <- factor(TLFB$Condom_Access, levels = c(0,1)) # 0 = no, 1 = yes
TLFB$STD <- gsub("2", "1", TLFB$STD) # 0 = don't know, 1 = postive or negative
TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$Partner_Type <- factor(TLFB$Partner_Type, levels = c(0,1,2)) # Regular = 2, Casual = 1, New = 0
TLFB$Drugs_YN <- factor(TLFB$Drugs_YN, levels = c(0,1)) # 0 no 1 yes
TLFB$Drink_YN <- factor(TLFB$Drink_YN, levels = c(0,1)) # 0 no 1 yes
#is.factor(TLFB$substanceuse)
is.factor(TLFB$Gender)
## [1] TRUE
is.factor(TLFB$Condom_Access)
## [1] TRUE
is.factor(TLFB$STD)
## [1] TRUE
is.factor(TLFB$Partner_Type)
## [1] TRUE
is.factor(TLFB$Drink_YN)
## [1] TRUE
is.factor(TLFB$Drugs_YN)
## [1] TRUE
require(psych)
## Loading required package: psych
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gtsummary)
##################table
#########random intercepts and slopes
# 1. Load required libraries
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(dplyr)
# --- Assuming the 'TLFB' data is loaded in your R session ---
# 2. Prepare the Data and ENSURE Condom_Access is numeric
# Create a sequential Observation_Index within each ID group.
TLFB_plot <- TLFB %>%
group_by(ID) %>%
mutate(Observation_Index = row_number(),
# Ensure Condom_Access is treated as numeric 0 or 1
Condom_Access_Numeric = as.numeric(as.character(Condom_Access))) %>%
ungroup()
# 3. Generate the ggplot using the numeric outcome variable
p_raw <- ggplot(TLFB_plot, aes(x = Observation_Index, y = Condom_Access_Numeric, group = ID)) +
# Add lines connecting the raw binary values for each ID.
geom_line(aes(color = factor(ID)), alpha = 0.5) +
# Add jittered points for the raw Condom_Access (0 or 1).
geom_point(aes(color = factor(ID)),
position = position_jitter(width = 0.2, height = 0.1),
alpha = 0.8,
size = 1.5) +
# Apply a smooth line to visualize the trend for each ID using GLM.
geom_smooth(aes(color = factor(ID)),
method = "glm",
method.args = list(family = binomial),
se = FALSE,
linewidth = 0.8) +
# Use scale_y_continuous now that the variable is confirmed to be numeric
scale_y_continuous(breaks = c(0, 1), labels = c("No Access (0)", "Access (1)")) +
# Customize the plot appearance
labs(
title = "Raw Condom Access (0/1) Trajectories by Individual ID",
subtitle = "Smooth lines show logistic trend per ID (for diagnostics)",
x = "Observation Index (Sequential within ID)",
y = "Raw Condom Access (0 = No, 1 = Yes)",
color = "Participant ID"
) +
theme_minimal() +
theme(legend.position = "none")
# Display the plot
print(p_raw)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#######model building
#library(nlme)
#randomIntercept_GLMM <- glmer(Condom_Access ~ 1 + (1 | ID),
# data = TLFB,
# family = binomial)
#summary(randomIntercept_GLMM)
#model_M1_fixed <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + (1 | ID),
# data = TLFB, family = binomial(link = "logit"))
#summary(model_M1_fixed)
#anova(randomIntercept_GLMM, model_M1_fixed)
######simple model to complicated ones to see the model fits.
##Baseline Model (Fixed Effects Only)
##Random Intercepts Model
#
#
# model_M2_RI <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (1 | ID), # Random Intercept only
# data = TLFB, family = binomial(link = "logit"),
# glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
#
# summary(model_M2_RI)
# Compare M1 (Fixed) vs. M2 (Random Intercept)
# This checks if adding the random intercept significantly improves the model fit.
#anova(model_M1_fixed, model_M2_RI)
##Complex Random Slopes Model
#
# model_M3_ComplexRS <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (Drugs_YN + Drink_YN + Partner_Type | ID), # Random Intercept AND Slopes
# data = TLFB, family = binomial(link = "logit"),
# glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
#
# summary(model_M3_ComplexRS)
##Interpretation: If the $\text{p-value}$ is small (e.g., $< 0.05$), $\text{M2}$ is significantly better than $\text{M1}$,
####indicating that the clustering by $\text{ID}$ must be accounted for.
# Compare M2 (Random Intercept) vs. M3 (Complex Random Slopes)
# This checks if the added complexity of random slopes significantly improves the fit.
#anova(model_M2_RI, model_M3_ComplexRS)
###Interpretation: If the $\text{p-value}$ is small (e.g., $< 0.05$),
####$\text{M3}$ is significantly better than $\text{M2}$,
####justifying the use of the complex random effects structure.
####Note: The anova() function uses a Likelihood Ratio Test,
#####which is appropriate for comparing nested models
#####(models where one is a specific case of the other, e.g., $\text{M2}$ is $\text{M3}$ with the random slope variances fixed to zero).
###Notice the correlation between the (Intercept) and substanceuse1 is $-0.89$.
#####A correlation close to $\mathbf{-1}$ or $\mathbf{1}$ means the random effects are highly redundant
######(i.e., they are explaining the same portion of variance in a highly predictable way).
######This is the equivalent of multicollinearity in the random effects structure and is a frequent cause of the singular fit warning, even when individual variances are large.
###### Correct approach: Keep the random intercepts and slopes, but force their covariance to zero
#####Since the variances are large (meaning the slopes are needed)
#####but the correlations are high (meaning the model is unstable),
#####the best step is to use the simplification recommended previously:
#####remove the correlations while keeping the large variances.
###You can do this by splitting the random effect term into uncorrelated components using the double bar operator
#
# library(optimx)
# model_M3_Simplified_Uncorr <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (1 || ID) + # Uncorrelated Random Intercept
# (Drugs_YN || ID) +
# (Drink_YN || ID) +
# (Partner_Type || ID),
# data = TLFB, family = binomial,
# control = glmerControl(optimizer = "optimx",
# optCtrl = list(method = "nlminb", maxiter = 10000)))
#
# summary(model_M3_Simplified_Uncorr)
# Option to test first, removes all covariance terms:
# model_M3_Simplified <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (Drugs_YN || ID) +
# (Drink_YN || ID) +
# (Partner_Type || ID),
# data = TLFB, family = binomial,
# control = glmerControl(optimizer = "optimx",
# optCtrl = list(method = "nlminb", maxiter = 10000)))
# summary(model_M3_Simplified)
#
#
#
# model_M3_Final <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (0 + Drugs_YN || ID) + # Exclude Random Intercept
# (0 + Drink_YN || ID) + # Exclude Random Intercept
# (0 + Partner_Type || ID), # Exclude Random Intercept
# data = TLFB, family = binomial,
# control = glmerControl(optimizer = "optimx",
# optCtrl = list(method = "nlminb", maxiter = 10000)))
#
# summary(model_M3_Final)
#
#
# # Model M4_Substance: Random Intercept + Random Slope for substanceuse (no correlation)
# model_M4_Substance <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (1 | ID) + # Re-introduce the uncorrelated random intercept (often needed)
# (0 + Drugs_YN || ID),
# (0 + Drink_YN || ID),
# data = TLFB, family = binomial,
# control = glmerControl(optimizer = "optimx",
# optCtrl = list(method = "nlminb", maxiter = 10000)))
# summary(model_M4_Substance)
# Model M4_Partner: Random Intercept + Random Slope for Partner_Type (no correlation)
# model_M4_Partner <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
# BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
# (1 | ID) + # Re-introduce the uncorrelated random intercept
# (0 + Partner_Type || ID),
# data = TLFB, family = binomial,
# control = glmerControl(optimizer = "optimx",
# optCtrl = list(method = "nlminb", maxiter = 10000)))
#
# summary(model_M4_Partner)
#
#### most conservative model.
model_M2_RI <- glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
(1 | ID), # Random Intercept only
data = TLFB, family = binomial(link = "logit"),
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(model_M2_RI)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +
## Gender + Drugs_YN + Drink_YN + Partner_Type + (1 | ID)
## Data: TLFB
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000))
##
## AIC BIC logLik -2*log(L) df.resid
## 371.6 415.8 -175.8 351.6 605
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.0438 0.0750 0.1478 0.2501 2.2811
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 3.73 1.931
## Number of obs: 615, groups: ID, 53
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.51155 0.86530 4.058 4.95e-05 ***
## UCLA_Total_scaled 0.56567 0.35722 1.584 0.1133
## STD_KQ_Total_scaled -0.05926 0.31014 -0.191 0.8485
## BSsum_scaled 0.26548 0.44509 0.596 0.5509
## Gender1 -1.50641 0.88045 -1.711 0.0871 .
## Drugs_YN1 0.06264 0.53063 0.118 0.9060
## Drink_YN1 0.01206 0.38433 0.031 0.9750
## Partner_Type1 0.86341 0.46841 1.843 0.0653 .
## Partner_Type2 -0.07253 0.49831 -0.146 0.8843
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) UCLA_T STD_KQ BSsm_s Gendr1 Drg_YN1 Drn_YN1 Prt_T1
## UCLA_Ttl_sc -0.021
## STD_KQ_Ttl_ 0.109 -0.156
## BSsum_scald 0.009 0.333 -0.313
## Gender1 -0.707 0.242 -0.211 0.353
## Drugs_YN1 -0.261 0.040 0.060 -0.143 0.130
## Drink_YN1 -0.203 -0.019 0.100 0.000 -0.015 -0.283
## Partnr_Typ1 -0.256 0.051 -0.042 -0.008 -0.037 -0.044 0.074
## Partnr_Typ2 -0.418 -0.062 -0.053 -0.062 0.004 0.019 0.178 0.551
model_M2_RI <- glmer(STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type +
(1 | ID), # Random Intercept only
data = TLFB, family = binomial(link = "logit"),
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(model_M2_RI)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +
## Gender + Drugs_YN + Drink_YN + Partner_Type + (1 | ID)
## Data: TLFB
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000))
##
## AIC BIC logLik -2*log(L) df.resid
## 309.9 354.1 -145.0 289.9 605
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.9097 -0.1296 -0.0321 0.0867 5.8500
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 46.33 6.807
## Number of obs: 615, groups: ID, 53
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.7522 2.2892 1.202 0.22927
## UCLA_Total_scaled -0.6567 1.2860 -0.511 0.60960
## STD_KQ_Total_scaled 0.9280 0.8903 1.042 0.29726
## BSsum_scaled 1.7718 1.4183 1.249 0.21159
## Gender1 -2.9062 2.8926 -1.005 0.31504
## Drugs_YN1 1.1697 0.5963 1.962 0.04981 *
## Drink_YN1 0.5531 0.4403 1.256 0.20908
## Partner_Type1 -1.7979 0.5674 -3.169 0.00153 **
## Partner_Type2 -5.4105 0.8178 -6.616 3.7e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) UCLA_T STD_KQ BSsm_s Gendr1 Drg_YN1 Drn_YN1 Prt_T1
## UCLA_Ttl_sc -0.177
## STD_KQ_Ttl_ 0.182 -0.305
## BSsum_scald 0.003 0.425 -0.337
## Gender1 -0.774 0.288 -0.273 0.312
## Drugs_YN1 -0.057 -0.009 0.069 -0.017 0.011
## Drink_YN1 -0.068 0.005 0.047 0.011 -0.016 -0.211
## Partnr_Typ1 -0.156 0.031 -0.074 -0.033 0.037 -0.223 0.064
## Partnr_Typ2 -0.216 0.029 -0.135 -0.060 0.095 -0.190 0.014 0.533
###"Although the ANOVA test comparing the Random Intercept model to the Complex Random Slope model
######(model_M3_ComplexRS) was significant, all attempts to fit the complex model structure resulted
#####in singular fits or convergence failures. This indicates that the data does not contain sufficient
#######information to reliably estimate the random slope variances.
#######Therefore, the most complex, stable model is the Random Intercept Only model,
####which was selected for final interpretation."
##########comparing GEE and glmer
#install.packages("geepack")
library(geepack)
TLFB$Condom_Access <- as.numeric(TLFB$Condom_Access)
#TLFB$Condom_Access
TLFB$Condom_Access <- gsub("1", "0", TLFB$Condom_Access)
TLFB$Condom_Access <- gsub("2", "1", TLFB$Condom_Access)
#TLFB$Condom_Access
TLFB$Condom_Access <- as.numeric(TLFB$Condom_Access)
# 2. Define the GEE model structure
# The fixed effects formula remains the same as your glmer model.
# The 'id' argument specifies the clustering variable (replaces the random effect).
# The 'corstr' argument specifies the working correlation structure.
geemodel_M2_Marginal <- geeglm(
# Fixed Effects Formula
Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type,
# Clustering/Grouping Variable (Required for GEE)
id = ID,
# Data and Link Function
data = TLFB,
family = binomial(link = "logit"),
# Working Correlation Structure
# Common choices:
# "exchangeable": Assumes constant correlation between all observations within an ID.
# "ar1": Assumes correlation decays as the time between observations increases (often good for longitudinal data).
# "independent": Assumes no correlation within an ID (least complex).
corstr = "exchangeable"
)
# 3. View the Summary and Robust Standard Errors
summary(geemodel_M2_Marginal)
##
## Call:
## geeglm(formula = Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
## BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type,
## family = binomial(link = "logit"), data = TLFB, id = ID,
## corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 2.060940 0.791770 6.775 0.00924 **
## UCLA_Total_scaled 0.394580 0.292118 1.825 0.17677
## STD_KQ_Total_scaled -0.139220 0.196784 0.501 0.47927
## BSsum_scaled 0.107552 0.324836 0.110 0.74057
## Gender1 -0.861213 0.719294 1.434 0.23119
## Drugs_YN1 0.009861 0.278854 0.001 0.97179
## Drink_YN1 0.038637 0.184965 0.044 0.83453
## Partner_Type1 0.611353 0.642623 0.905 0.34143
## Partner_Type2 -0.004097 0.655607 0.000 0.99501
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = exchangeable
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.9057 0.5454
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.3385 0.207
## Number of clusters: 53 Maximum cluster size: 68
TLFB$STD
## [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [75] 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [223] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## [482] 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1
## [519] 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
## [556] 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [593] 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0
## Levels: 0 1
#TLFB$STD[is.na(TLFB$STD)] <- 1
#TLFB$STD
#TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$STD <- as.numeric(TLFB$STD)
#TLFB$STD
TLFB$STD_numeric <- as.numeric(as.factor(TLFB$STD)) - 1
TLFB$STD_numeric
## [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## [75] 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
## [223] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
## [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## [482] 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1
## [519] 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1
## [556] 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## [593] 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0
# 2. Define the GEE model structure
# The fixed effects formula remains the same as your glmer model.
# The 'id' argument specifies the clustering variable (replaces the random effect).
# The 'corstr' argument specifies the working correlation structure.
geemodel_M2_Marginal_STD <- geeglm(
# Fixed Effects Formula
STD_numeric ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type,
# Clustering/Grouping Variable (Required for GEE)
id = ID,
# Data and Link Function
data = TLFB,
family = binomial(link = "logit"),
# Working Correlation Structure
# Common choices:
# "exchangeable": Assumes constant correlation between all observations within an ID.
# "ar1": Assumes correlation decays as the time between observations increases (often good for longitudinal data).
# "independent": Assumes no correlation within an ID (least complex).
corstr = "exchangeable"
)
# 3. View the Summary and Robust Standard Errors
summary(geemodel_M2_Marginal_STD)
##
## Call:
## geeglm(formula = STD_numeric ~ UCLA_Total_scaled + STD_KQ_Total_scaled +
## BSsum_scaled + Gender + Drugs_YN + Drink_YN + Partner_Type,
## family = binomial(link = "logit"), data = TLFB, id = ID,
## corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 0.8329 0.7480 1.24 0.2654
## UCLA_Total_scaled -0.4826 0.3473 1.93 0.1646
## STD_KQ_Total_scaled 0.3693 0.2670 1.91 0.1665
## BSsum_scaled -0.5599 0.3783 2.19 0.1389
## Gender1 -1.1883 0.7603 2.44 0.1181
## Drugs_YN1 0.3569 0.4299 0.69 0.4064
## Drink_YN1 0.0513 0.1706 0.09 0.7634
## Partner_Type1 -0.6866 0.4953 1.92 0.1657
## Partner_Type2 -1.8854 0.6728 7.85 0.0051 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = exchangeable
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 1.83 7.61
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.583 2.02
## Number of clusters: 53 Maximum cluster size: 68
# install.packages("sjPlot")
library(sjPlot)
##
## Attaching package: 'sjPlot'
## The following object is masked from 'package:ggplot2':
##
## set_theme
# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(geemodel_M2_Marginal, type = "est")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
## Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#library(ggeffects)
#library(ggplot2) # Optional, for further customization
tab_model(geemodel_M2_Marginal)
| Condom_Access | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 7.85 | 1.66 – 37.07 | 0.009 |
| UCLA Total scaled | 1.48 | 0.84 – 2.63 | 0.177 |
| STD KQ Total scaled | 0.87 | 0.59 – 1.28 | 0.479 |
| BSsum scaled | 1.11 | 0.59 – 2.10 | 0.741 |
| Gender [1] | 0.42 | 0.10 – 1.73 | 0.231 |
| Drugs YN [1] | 1.01 | 0.58 – 1.74 | 0.972 |
| Drink YN [1] | 1.04 | 0.72 – 1.49 | 0.835 |
| Partner Type [1] | 1.84 | 0.52 – 6.49 | 0.341 |
| Partner Type [2] | 1.00 | 0.28 – 3.60 | 0.995 |
| N ID | 53 | ||
| Observations | 615 | ||
# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(geemodel_M2_Marginal_STD, type = "est")
#library(ggeffects)
#library(ggplot2) # Optional, for further customization
tab_model(geemodel_M2_Marginal_STD)
| STD_numeric | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 2.30 | 0.53 – 9.96 | 0.265 |
| UCLA Total scaled | 0.62 | 0.31 – 1.22 | 0.165 |
| STD KQ Total scaled | 1.45 | 0.86 – 2.44 | 0.166 |
| BSsum scaled | 0.57 | 0.27 – 1.20 | 0.139 |
| Gender [1] | 0.30 | 0.07 – 1.35 | 0.118 |
| Drugs YN [1] | 1.43 | 0.62 – 3.32 | 0.406 |
| Drink YN [1] | 1.05 | 0.75 – 1.47 | 0.763 |
| Partner Type [1] | 0.50 | 0.19 – 1.33 | 0.166 |
| Partner Type [2] | 0.15 | 0.04 – 0.57 | 0.005 |
| N ID | 53 | ||
| Observations | 615 | ||
##################table
predictor_vars <- c("UCLA_Total_scaled", "STD_KQ_Total_scaled", "BSsum_scaled",
"Gender", "Drugs_YN", "Drink_YN", "Partner_Type")
# Use this code, simply removing the line that caused the error.
descriptive_table <- TLFB %>%
select(Condom_Access, all_of(predictor_vars)) %>%
tbl_summary(
by = Condom_Access,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
label = list(
UCLA_Total_scaled ~ "UCLA Score (Scaled)",
STD_KQ_Total_scaled ~ "STD Knowledge (Scaled)",
BSsum_scaled ~ "Behavior Score Sum (Scaled)",
Gender ~ "Gender",
Drugs_YN ~ "Drugs_YN Use",
Drink_YN ~ "Drink",
Partner_Type ~ "Partner Type"
),
digits = list(all_continuous() ~ 2),
type = all_of(predictor_vars) ~ "categorical"
) %>%
add_overall() %>%
modify_header(label = "**Characteristic**") %>%
modify_spanning_header(all_stat_cols() ~ "**Condom Access Status**")
# Use 'print()' or just the object name to display the table
print(descriptive_table)
| Characteristic |
Condom Access Status
|
||
|---|---|---|---|
|
Overall N = 6151 |
0 N = 921 |
1 N = 5231 |
|
| UCLA Score (Scaled) |
|
|
|
| -2.90676298457177 | 3 (0.5%) | 0 (0%) | 3 (0.6%) |
| -1.92460458838561 | 10 (1.6%) | 2 (2.2%) | 8 (1.5%) |
| -1.8591273619732 | 10 (1.6%) | 9 (9.8%) | 1 (0.2%) |
| -1.72817290914838 | 41 (6.7%) | 2 (2.2%) | 39 (7.5%) |
| -1.46626400349874 | 4 (0.7%) | 1 (1.1%) | 3 (0.6%) |
| -1.33530955067391 | 19 (3.1%) | 13 (14%) | 6 (1.1%) |
| -1.07340064502427 | 8 (1.3%) | 1 (1.1%) | 7 (1.3%) |
| -1.00792341861186 | 10 (1.6%) | 1 (1.1%) | 9 (1.7%) |
| -0.876968965787038 | 5 (0.8%) | 1 (1.1%) | 4 (0.8%) |
| -0.811491739374627 | 52 (8.5%) | 1 (1.1%) | 51 (9.8%) |
| -0.746014512962216 | 1 (0.2%) | 1 (1.1%) | 0 (0%) |
| -0.615060060137395 | 16 (2.6%) | 10 (11%) | 6 (1.1%) |
| -0.549582833724984 | 27 (4.4%) | 8 (8.7%) | 19 (3.6%) |
| -0.484105607312573 | 3 (0.5%) | 1 (1.1%) | 2 (0.4%) |
| -0.418628380900162 | 31 (5.0%) | 0 (0%) | 31 (5.9%) |
| -0.0257650224256974 | 11 (1.8%) | 0 (0%) | 11 (2.1%) |
| 0.0397122039867134 | 12 (2.0%) | 0 (0%) | 12 (2.3%) |
| 0.105189430399124 | 17 (2.8%) | 2 (2.2%) | 15 (2.9%) |
| 0.170666656811535 | 68 (11%) | 0 (0%) | 68 (13%) |
| 0.236143883223946 | 17 (2.8%) | 1 (1.1%) | 16 (3.1%) |
| 0.301621109636357 | 12 (2.0%) | 0 (0%) | 12 (2.3%) |
| 0.367098336048768 | 3 (0.5%) | 2 (2.2%) | 1 (0.2%) |
| 0.498052788873589 | 8 (1.3%) | 0 (0%) | 8 (1.5%) |
| 0.563530015286 | 57 (9.3%) | 11 (12%) | 46 (8.8%) |
| 0.629007241698411 | 35 (5.7%) | 0 (0%) | 35 (6.7%) |
| 0.890916147348054 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| 0.956393373760465 | 37 (6.0%) | 3 (3.3%) | 34 (6.5%) |
| 1.08734782658529 | 29 (4.7%) | 0 (0%) | 29 (5.5%) |
| 1.21830227941011 | 1 (0.2%) | 0 (0%) | 1 (0.2%) |
| 1.28377950582252 | 2 (0.3%) | 0 (0%) | 2 (0.4%) |
| 1.48021118505975 | 29 (4.7%) | 20 (22%) | 9 (1.7%) |
| 1.67664286429698 | 29 (4.7%) | 2 (2.2%) | 27 (5.2%) |
| 1.87307454353422 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| STD Knowledge (Scaled) |
|
|
|
| -4.1526208932418 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| -2.74216700001074 | 6 (1.0%) | 1 (1.1%) | 5 (1.0%) |
| -2.27201570226705 | 3 (0.5%) | 0 (0%) | 3 (0.6%) |
| -1.80186440452337 | 8 (1.3%) | 1 (1.1%) | 7 (1.3%) |
| -1.33171310677968 | 39 (6.3%) | 18 (20%) | 21 (4.0%) |
| -0.861561809035992 | 110 (18%) | 4 (4.3%) | 106 (20%) |
| -0.391410511292305 | 97 (16%) | 8 (8.7%) | 89 (17%) |
| 0.0787407864513819 | 128 (21%) | 38 (41%) | 90 (17%) |
| 0.548892084195069 | 64 (10%) | 7 (7.6%) | 57 (11%) |
| 1.01904338193876 | 123 (20%) | 7 (7.6%) | 116 (22%) |
| 1.95934597742613 | 29 (4.7%) | 8 (8.7%) | 21 (4.0%) |
| 2.8996485729135 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| Behavior Score Sum (Scaled) |
|
|
|
| -2.6145127906319 | 5 (0.8%) | 1 (1.1%) | 4 (0.8%) |
| -2.21292049161192 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| -2.11252241685693 | 6 (1.0%) | 1 (1.1%) | 5 (1.0%) |
| -1.96192530472444 | 1 (0.2%) | 1 (1.1%) | 0 (0%) |
| -1.76112915521445 | 3 (0.5%) | 0 (0%) | 3 (0.6%) |
| -1.53523348701571 | 21 (3.4%) | 4 (4.3%) | 17 (3.3%) |
| -1.48503444963821 | 1 (0.2%) | 0 (0%) | 1 (0.2%) |
| -1.45993493094946 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| -1.38463637488322 | 7 (1.1%) | 0 (0%) | 7 (1.3%) |
| -1.30933781881697 | 31 (5.0%) | 2 (2.2%) | 29 (5.5%) |
| -1.28423830012822 | 5 (0.8%) | 0 (0%) | 5 (1.0%) |
| -1.20893974406197 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| -1.18384022537323 | 3 (0.5%) | 2 (2.2%) | 1 (0.2%) |
| -1.13364118799573 | 3 (0.5%) | 0 (0%) | 3 (0.6%) |
| -1.00814359455198 | 8 (1.3%) | 1 (1.1%) | 7 (1.3%) |
| -0.957944557174487 | 9 (1.5%) | 5 (5.4%) | 4 (0.8%) |
| -0.932845038485738 | 8 (1.3%) | 1 (1.1%) | 7 (1.3%) |
| -0.681849851598251 | 46 (7.5%) | 11 (12%) | 35 (6.7%) |
| -0.656750332909503 | 8 (1.3%) | 0 (0%) | 8 (1.5%) |
| -0.531252739465759 | 6 (1.0%) | 1 (1.1%) | 5 (1.0%) |
| -0.50615322077701 | 7 (1.1%) | 0 (0%) | 7 (1.3%) |
| -0.481053702088262 | 46 (7.5%) | 1 (1.1%) | 45 (8.6%) |
| -0.405755146022016 | 7 (1.1%) | 0 (0%) | 7 (1.3%) |
| -0.355556108644518 | 9 (1.5%) | 3 (3.3%) | 6 (1.1%) |
| -0.280257552578272 | 9 (1.5%) | 7 (7.6%) | 2 (0.4%) |
| -0.255158033889523 | 6 (1.0%) | 2 (2.2%) | 4 (0.8%) |
| -0.230058515200775 | 3 (0.5%) | 0 (0%) | 3 (0.6%) |
| -0.0543618843795336 | 15 (2.4%) | 2 (2.2%) | 13 (2.5%) |
| -0.00416284700203615 | 3 (0.5%) | 1 (1.1%) | 2 (0.4%) |
| 0.0209366716867126 | 4 (0.7%) | 0 (0%) | 4 (0.8%) |
| 0.0460361903754613 | 39 (6.3%) | 1 (1.1%) | 38 (7.3%) |
| 0.0962352277529587 | 38 (6.2%) | 21 (23%) | 17 (3.3%) |
| 0.196633302507954 | 3 (0.5%) | 0 (0%) | 3 (0.6%) |
| 0.347230414640446 | 35 (5.7%) | 0 (0%) | 35 (6.7%) |
| 0.372329933329195 | 17 (2.8%) | 13 (14%) | 4 (0.8%) |
| 0.422528970706692 | 15 (2.4%) | 4 (4.3%) | 11 (2.1%) |
| 0.497827526772938 | 8 (1.3%) | 0 (0%) | 8 (1.5%) |
| 0.522927045461687 | 5 (0.8%) | 4 (4.3%) | 1 (0.2%) |
| 0.573126082839184 | 17 (2.8%) | 1 (1.1%) | 16 (3.1%) |
| 0.698623676282928 | 4 (0.7%) | 1 (1.1%) | 3 (0.6%) |
| 1.35121116219039 | 8 (1.3%) | 0 (0%) | 8 (1.5%) |
| 1.37631068087914 | 18 (2.9%) | 1 (1.1%) | 17 (3.3%) |
| 1.42650971825664 | 116 (19%) | 0 (0%) | 116 (22%) |
| Gender |
|
|
|
| 0 | 286 (47%) | 12 (13%) | 274 (52%) |
| 1 | 329 (53%) | 80 (87%) | 249 (48%) |
| Drugs_YN Use |
|
|
|
| 0 | 348 (57%) | 61 (66%) | 287 (55%) |
| 1 | 267 (43%) | 31 (34%) | 236 (45%) |
| Drink |
|
|
|
| 0 | 327 (53%) | 52 (57%) | 275 (53%) |
| 1 | 288 (47%) | 40 (43%) | 248 (47%) |
| Partner Type |
|
|
|
| 0 | 102 (17%) | 19 (21%) | 83 (16%) |
| 1 | 168 (27%) | 24 (26%) | 144 (28%) |
| 2 | 345 (56%) | 49 (53%) | 296 (57%) |
| 1 n (%) | |||
###missing?
model_vars <- c("Condom_Access", "STD_numeric", "UCLA_Total_scaled", "STD_KQ_Total_scaled",
"BSsum_scaled", "Gender", "Partner_Type", "ID", "Drugs_YN", "Drink_YN")
# Check missing counts and percentages for relevant variables
missing_info <- data.frame(
Variable = model_vars,
Count_NA = sapply(TLFB[model_vars], function(x) sum(is.na(x))),
Percent_NA = sapply(TLFB[model_vars], function(x) mean(is.na(x))) * 100
)
print(missing_info)
Variable Count_NA Percent_NA
Condom_Access Condom_Access 0 0 STD_numeric STD_numeric 0 0 UCLA_Total_scaled UCLA_Total_scaled 0 0 STD_KQ_Total_scaled STD_KQ_Total_scaled 0 0 BSsum_scaled BSsum_scaled 0 0 Gender Gender 0 0 Partner_Type Partner_Type 0 0 ID ID 0 0 Drugs_YN Drugs_YN 0 0 Drink_YN Drink_YN 0 0