Untitled

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

setwd("~/Desktop/FDU LAB")
TLFB2 <- read.csv("TLFB_9-22-22(TLFB).csv")

TLFB <-TLFB2[-c(1,2, 616),]

# summary(TLFB)
# require(psych)
# describe(TLFB)
# getOption("max.print")
# options(max.print = 100000)
# describeTLFB <- describe(TLFB)
# describeTLFB
# TLFB$Condom_Access

WIDE <- read.csv("WIDE_FINAL(WIDE_FINAL).csv")
#describe(WIDE)
#describeWIDE <- describe(WIDE)
#describeWIDE


#There are two datasets from my dissertation project, one is in wide format and 
#the other is in long format. The datasets are the same,
#the data structure differs based on the type of analyses that were conducted - 
#long for multilevel models and wide for everything else.




###### variables 

#####demographics

#demo <- WIDE[,1:23]


######## condomaccess

#CDAC <-TLFB$Condom_Access

#######partner STI

#PSTI <-TLFB$STD
require(stringr)
TLFB$STD <- gsub("2", "1", TLFB$STD)
#TLFB$STD
######partner types

#ptype <- TLFB$Partner_Type
#TLFB$Partner_Type
#########Motivation

#moti <-TLFB$UCLA_Total



#########information

#info <-TLFB$STD_KQ_Total


#########behavior skill

# beha <- TLFB$CISQ_W_Total
# beha <- TLFB$CISQ_S_Total
# beha <- TLFB$CISQ_DR_Total
# beha <- TLFB$CISQ_RC_Total
# beha <- TLFB$CISQ_R_Total
# beha <- TLFB$CISQ_D_Total
# beha <- TLFB$CISQ_P_Total

BSsum <- data.frame(TLFB$CISQ_W_Total, TLFB$CISQ_S_Total, TLFB$CISQ_DR_Total, TLFB$CISQ_RC_Total, 
                     TLFB$CISQ_R_Total, TLFB$CISQ_D_Total, TLFB$CISQ_P_Total)
TLFB$BSsum <-rowSums(BSsum)
#TLFB$BSsum
##########substance use

#drinkuse <- TLFB$Drink_YN
#drug <-TLFB$Drugs_YN
  


######??lme4

#library(dplyr)
#install.packages("pacman")
#pacman::p_load(Matrix)
library(lme4)

###### as numeric 
TLFB$UCLA_Total <- as.numeric(as.character(TLFB$UCLA_Total))
TLFB$STD_KQ_Total <- as.numeric(as.character(TLFB$STD_KQ_Total))
TLFB$BSsum <- as.numeric(as.character(TLFB$BSsum))
TLFB$Condom_Access <- as.numeric(as.character(TLFB$Condom_Access))
TLFB$ID <- as.numeric(as.character(TLFB$ID))
TLFB$STD <- as.numeric(as.character(TLFB$STD))

TLFB$Gender <- as.numeric(as.character(TLFB$Gender))




#TLFB$Drink_YN
#TLFB$Drugs_YN
# combine the drug and alcohol use 
subs <- data.frame(as.numeric(TLFB$Drink_YN), as.numeric(TLFB$Drugs_YN))
TLFB$substanceuse <-rowSums(subs)
#TLFB$substanceuse 
require(stringr)
TLFB$substanceuse <- gsub("2", "1", TLFB$substanceuse)
#TLFB$substanceuse


###scaled numeric values 
TLFB$UCLA_Total_scaled <- scale(TLFB$UCLA_Total)
TLFB$STD_KQ_Total_scaled <- scale(TLFB$STD_KQ_Total)
TLFB$BSsum_scaled <- scale(TLFB$BSsum)
TLFB$ID_scaled <- scale(TLFB$ID)


#Test whether the categorical variables are factor variables
#Change to factor if the results of the above were "FALSE"
TLFB$substanceuse <- factor(TLFB$substanceuse, levels = c(0,1))
TLFB$Gender <- factor(TLFB$Gender, levels = c(0,1))
TLFB$Condom_Access <- factor(TLFB$Condom_Access, levels = c(0,1))
TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$Partner_Type <- factor(TLFB$Partner_Type, levels = c(0,1,2))

is.factor(TLFB$substanceuse)

## [1] TRUE

is.factor(TLFB$Gender)

## [1] TRUE

is.factor(TLFB$Condom_Access)

## [1] TRUE

is.factor(TLFB$STD)

## [1] TRUE

is.factor(TLFB$Partner_Type)

## [1] TRUE

###Fit the full model 
model_condom_accessibility <-glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled +  BSsum_scaled  + Gender +
                                     (1 |ID)+
                                     (1 |substanceuse)+
                                     (1 |Partner_Type),
                                   data = TLFB, family = binomial(link = "logit"),
                                   glmerControl(optimizer = "bobyqa"))


####Overall Fit & Uniformity: Generate simulated residuals and plot them
require(DHARMa)

simulationOutput <- DHARMa::simulateResiduals(fittedModel = model_condom_accessibility, plot = TRUE)

## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully

####The points in the Q-Q plot should align with the diagonal line,
####and the p-value for the Kolmogorov-Smirnov test should be above 0.05, indicating uniformity.

DHARMa::testDispersion(simulationOutput)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0272, p-value = 0.88
## alternative hypothesis: two.sided

###A p-value below 0.05 usually indicates a problem that needs addressing (e.g., using a negative binomial family instead of Poisson).
qqnorm(ranef(model_condom_accessibility)[[1]][,1])
qqline(ranef(model_condom_accessibility)[[1]][,1])

###Normality of Random Effects: Visually inspect the distribution of the extracted random effects using Q-Q plots to ensure they are approximately normal:
###The Q-Q plot compares the distribution of your sample data (the extracted random effects) against a theoretical normal distribution. 
###If the data are normally distributed, the points should fall closely along a straight diagonal line.
##The visual inspection of the provided Q-Q plot suggests a deviation from the normal distribution, particularly in the lower (left) tail.

summary(model_condom_accessibility)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +  
##     Gender + (1 | ID) + (1 | substanceuse) + (1 | Partner_Type)
##    Data: TLFB
## Control: glmerControl(optimizer = "bobyqa")
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     372.1     407.5    -178.1     356.1       606 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.6724  0.0754  0.1515  0.2505  2.2315 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  ID           (Intercept) 3.58283  1.8928  
##  Partner_Type (Intercept) 0.09038  0.3006  
##  substanceuse (Intercept) 0.00000  0.0000  
## Number of obs: 614, groups:  ID, 53; Partner_Type, 3; substanceuse, 2
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          3.75833    0.74630   5.036 4.76e-07 ***
## UCLA_Total_scaled    0.52274    0.35199   1.485   0.1375    
## STD_KQ_Total_scaled -0.06485    0.30330  -0.214   0.8307    
## BSsum_scaled         0.26525    0.43340   0.612   0.5405    
## Gender1             -1.50325    0.85956  -1.749   0.0803 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) UCLA_T STD_KQ BSsm_s
## UCLA_Ttl_sc -0.016                     
## STD_KQ_Ttl_  0.151 -0.162              
## BSsum_scald -0.059  0.341 -0.309       
## Gender1     -0.775  0.238 -0.224  0.378
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

# install.packages("sjPlot")
library(sjPlot)

# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(model_condom_accessibility, type = "est")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
##   Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(ggeffects)
library(ggplot2) # Optional, for further customization


# Use sjPlot to quickly generate Q-Q plots for all random effects
plot_model(model_condom_accessibility, type = "pred")

## $UCLA_Total_scaled

## 
## $STD_KQ_Total_scaled

## 
## $BSsum_scaled

## 
## $Gender

first_re_plot <- plot_model(model_condom_accessibility, type = "re")[[1]] 
se_re_plot <- plot_model(model_condom_accessibility, type = "re")[[2]] 
th_re_plot <- plot_model(model_condom_accessibility, type = "re")[[3]] 

modified_plot <- first_re_plot +
  labs(
    title = "Participant ID Random Effects",
    x = "ID",
    y = "Condom Access",
    caption = "Model: Condom_Access"
  )

print(modified_plot)

modified_plot2 <- se_re_plot +
  labs(
    title = "Partner Type Random Effects",
    subtitle = "0 = New, 1 = Causal, 2 = Regular",
    x = "Partner Type",
    y = "Condom Access",
    caption = "Model: Condom_Access"
  )

print(modified_plot2)

modified_plot3 <- th_re_plot +
  labs(
    title = "Substance UseRandom Effects",
    subtitle = "0 = no use, 1 = use",
    x = "Substance use",
    y = "Condom Access",
    caption = "Model: Condom_Access"
  )

print(modified_plot3)

sjPlot::tab_model(model_condom_accessibility)

	Condom_Access
Predictors	Odds Ratios	CI	p
(Intercept)	42.88	9.93 – 185.13	<0.001
UCLA Total scaled	1.69	0.85 – 3.36	0.138
STD KQ Total scaled	0.94	0.52 – 1.70	0.831
BSsum scaled	1.30	0.56 – 3.05	0.541
Gender [1]	0.22	0.04 – 1.20	0.080
Random Effects
σ²	3.29
τ₀₀ _ID	3.58
τ₀₀ _{Partner_Type}	0.09
τ₀₀ _substanceuse	0.00
N _ID	53
N _substanceuse	2
N _{Partner_Type}	3
Observations	614
Marginal R² / Conditional R²	0.255 / NA

library(performance)

# This will return the correct ICC calculation for your model
#icc(model_condom_accessibility)
##Warning message:
#Can't compute random effect variances. Some variance components equal zero. Your model may suffer from singularity (see
#  `?lme4::isSingular` and `?performance::check_singularity`).
#  Decrease the `tolerance` level to force the calculation of random effect variances, or impose priors on your random effects
#  parameters (using packages like `brms` or `glmmTMB`). 

########STD



###Fit the full model 
model_STD <-glmer(STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled +  BSsum_scaled  + Gender +
                                     (1 |ID)+
                                     (1 |substanceuse)+
                                     (1 |Partner_Type),
                                   data = TLFB, family = binomial(link = "logit"),
                                   glmerControl(optimizer = "bobyqa"))


####Overall Fit & Uniformity: Generate simulated residuals and plot them
require(DHARMa)

simulationOutput_STD <- DHARMa::simulateResiduals(fittedModel = model_STD, plot = TRUE)

####The points in the Q-Q plot should align with the diagonal line,
####and the p-value for the Kolmogorov-Smirnov test should be above 0.05, indicating uniformity.

DHARMa::testDispersion(simulationOutput_STD)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.2824, p-value = 0.256
## alternative hypothesis: two.sided

###A p-value below 0.05 usually indicates a problem that needs addressing (e.g., using a negative binomial family instead of Poisson).
qqnorm(ranef(model_STD)[[1]][,1])
qqline(ranef(model_STD)[[1]][,1])

###Normality of Random Effects: Visually inspect the distribution of the extracted random effects using Q-Q plots to ensure they are approximately normal:
###The Q-Q plot compares the distribution of your sample data (the extracted random effects) against a theoretical normal distribution. 
###If the data are normally distributed, the points should fall closely along a straight diagonal line.
##The visual inspection of the provided Q-Q plot suggests a deviation from the normal distribution, particularly in the lower (left) tail.

summary(model_STD)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +  
##     Gender + (1 | ID) + (1 | substanceuse) + (1 | Partner_Type)
##    Data: TLFB
## Control: glmerControl(optimizer = "bobyqa")
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     322.0     357.4    -153.0     306.0       606 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.8728 -0.1636 -0.0400  0.0835  5.7124 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  ID           (Intercept) 43.8399  6.6212  
##  Partner_Type (Intercept)  5.0935  2.2569  
##  substanceuse (Intercept)  0.3901  0.6246  
## Number of obs: 614, groups:  ID, 53; Partner_Type, 3; substanceuse, 2
## 
## Fixed effects:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)           1.1776     2.6287   0.448    0.654
## UCLA_Total_scaled    -0.8105     1.3274  -0.611    0.541
## STD_KQ_Total_scaled   0.7914     0.8822   0.897    0.370
## BSsum_scaled          1.9819     1.5009   1.320    0.187
## Gender1              -2.9447     2.9905  -0.985    0.325
## 
## Correlation of Fixed Effects:
##             (Intr) UCLA_T STD_KQ BSsm_s
## UCLA_Ttl_sc -0.153                     
## STD_KQ_Ttl_  0.159 -0.284              
## BSsum_scald -0.065  0.459 -0.333       
## Gender1     -0.691  0.318 -0.263  0.352

# install.packages("sjPlot")
library(sjPlot)

# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(model_STD, type = "est")

library(ggeffects)
library(ggplot2) # Optional, for further customization


# Use sjPlot to quickly generate Q-Q plots for all random effects
plot_model(model_STD, type = "pred")

## $UCLA_Total_scaled

## 
## $STD_KQ_Total_scaled

## 
## $BSsum_scaled

## 
## $Gender

first_re_plot <- plot_model(model_STD, type = "re")[[1]] 
se_re_plot <- plot_model(model_STD, type = "re")[[2]] 
th_re_plot <- plot_model(model_STD, type = "re")[[3]] 

modified_plot <- first_re_plot +
  labs(
    title = "Participant ID Random Effects",
    x = "ID",
    y = "STD",
    caption = "Model: STD"
  )

print(modified_plot)

modified_plot2 <- se_re_plot +
  labs(
    title = "Partner Type Random Effects",
    subtitle = "0 = New, 1 = Causal, 2 = Regular",
    x = "Partner Type",
    y = "STD",
    caption = "Model: STD"
  )

print(modified_plot2)

modified_plot3 <- th_re_plot +
  labs(
    title = "Substance UseRandom Effects",
    subtitle = "0 = no use, 1 = use",
    x = "Substance use",
    y = "STD",
    caption = "Model: STD"
  )

print(modified_plot3)

tab_model(model_STD)

	STD
Predictors	Odds Ratios	CI	p
(Intercept)	3.25	0.02 – 561.04	0.654
UCLA Total scaled	0.44	0.03 – 6.00	0.541
STD KQ Total scaled	2.21	0.39 – 12.43	0.370
BSsum scaled	7.26	0.38 – 137.50	0.187
Gender [1]	0.05	0.00 – 18.48	0.325
Random Effects
σ²	3.29
τ₀₀ _ID	43.84
τ₀₀ _{Partner_Type}	5.09
τ₀₀ _substanceuse	0.39
ICC	0.94
N _ID	53
N _substanceuse	2
N _{Partner_Type}	3
Observations	614
Marginal R² / Conditional R²	0.166 / 0.948

##The table provides Odds Ratios (OR), their 95% Confidence Intervals (CI), and associated p-values for the main predictors. An OR greater than 1 means an increased odds of the outcome (STD), while an OR less than 1 means decreased odds.



##The random effects describe how much individual participants vary from the overall average effects. These values are reported as variances (\(\tau _{00}\)) and standard deviation (\(\sigma ^{2}\)).

##ICC (Intraclass Correlation Coefficient): 0.94
####This is a very high ICC. 
#####It suggests that about 94% of the total variability in the outcome (STD status) is attributable to differences between individuals (ID), rather than differences within individuals across different observations. This confirms that using a mixed-effects model was absolutely necessary, as individual identity matters profoundly in predicting the outcome.

Untitled

Dawoon Lee

2025-11-29

R Markdown