This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
setwd("~/Desktop/FDU LAB")
TLFB2 <- read.csv("TLFB_9-22-22(TLFB).csv")
TLFB <-TLFB2[-c(1,2, 616),]
# summary(TLFB)
# require(psych)
# describe(TLFB)
# getOption("max.print")
# options(max.print = 100000)
# describeTLFB <- describe(TLFB)
# describeTLFB
# TLFB$Condom_Access
WIDE <- read.csv("WIDE_FINAL(WIDE_FINAL).csv")
#describe(WIDE)
#describeWIDE <- describe(WIDE)
#describeWIDE
#There are two datasets from my dissertation project, one is in wide format and
#the other is in long format. The datasets are the same,
#the data structure differs based on the type of analyses that were conducted -
#long for multilevel models and wide for everything else.
###### variables
#####demographics
#demo <- WIDE[,1:23]
######## condomaccess
#CDAC <-TLFB$Condom_Access
#######partner STI
#PSTI <-TLFB$STD
require(stringr)
TLFB$STD <- gsub("2", "1", TLFB$STD)
#TLFB$STD
######partner types
#ptype <- TLFB$Partner_Type
#TLFB$Partner_Type
#########Motivation
#moti <-TLFB$UCLA_Total
#########information
#info <-TLFB$STD_KQ_Total
#########behavior skill
# beha <- TLFB$CISQ_W_Total
# beha <- TLFB$CISQ_S_Total
# beha <- TLFB$CISQ_DR_Total
# beha <- TLFB$CISQ_RC_Total
# beha <- TLFB$CISQ_R_Total
# beha <- TLFB$CISQ_D_Total
# beha <- TLFB$CISQ_P_Total
BSsum <- data.frame(TLFB$CISQ_W_Total, TLFB$CISQ_S_Total, TLFB$CISQ_DR_Total, TLFB$CISQ_RC_Total,
TLFB$CISQ_R_Total, TLFB$CISQ_D_Total, TLFB$CISQ_P_Total)
TLFB$BSsum <-rowSums(BSsum)
#TLFB$BSsum
##########substance use
#drinkuse <- TLFB$Drink_YN
#drug <-TLFB$Drugs_YN
######??lme4
#library(dplyr)
#install.packages("pacman")
#pacman::p_load(Matrix)
library(lme4)
###### as numeric
TLFB$UCLA_Total <- as.numeric(as.character(TLFB$UCLA_Total))
TLFB$STD_KQ_Total <- as.numeric(as.character(TLFB$STD_KQ_Total))
TLFB$BSsum <- as.numeric(as.character(TLFB$BSsum))
TLFB$Condom_Access <- as.numeric(as.character(TLFB$Condom_Access))
TLFB$ID <- as.numeric(as.character(TLFB$ID))
TLFB$STD <- as.numeric(as.character(TLFB$STD))
TLFB$Gender <- as.numeric(as.character(TLFB$Gender))
#TLFB$Drink_YN
#TLFB$Drugs_YN
# combine the drug and alcohol use
subs <- data.frame(as.numeric(TLFB$Drink_YN), as.numeric(TLFB$Drugs_YN))
TLFB$substanceuse <-rowSums(subs)
#TLFB$substanceuse
require(stringr)
TLFB$substanceuse <- gsub("2", "1", TLFB$substanceuse)
#TLFB$substanceuse
###scaled numeric values
TLFB$UCLA_Total_scaled <- scale(TLFB$UCLA_Total)
TLFB$STD_KQ_Total_scaled <- scale(TLFB$STD_KQ_Total)
TLFB$BSsum_scaled <- scale(TLFB$BSsum)
TLFB$ID_scaled <- scale(TLFB$ID)
#Test whether the categorical variables are factor variables
#Change to factor if the results of the above were "FALSE"
TLFB$substanceuse <- factor(TLFB$substanceuse, levels = c(0,1))
TLFB$Gender <- factor(TLFB$Gender, levels = c(0,1))
TLFB$Condom_Access <- factor(TLFB$Condom_Access, levels = c(0,1))
TLFB$STD <- factor(TLFB$STD, levels = c(0,1))
TLFB$Partner_Type <- factor(TLFB$Partner_Type, levels = c(0,1,2))
is.factor(TLFB$substanceuse)
## [1] TRUE
is.factor(TLFB$Gender)
## [1] TRUE
is.factor(TLFB$Condom_Access)
## [1] TRUE
is.factor(TLFB$STD)
## [1] TRUE
is.factor(TLFB$Partner_Type)
## [1] TRUE
###Fit the full model
model_condom_accessibility <-glmer(Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled + Gender +
(1 |ID)+
(1 |substanceuse)+
(1 |Partner_Type),
data = TLFB, family = binomial(link = "logit"),
glmerControl(optimizer = "bobyqa"))
####Overall Fit & Uniformity: Generate simulated residuals and plot them
require(DHARMa)
simulationOutput <- DHARMa::simulateResiduals(fittedModel = model_condom_accessibility, plot = TRUE)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
####The points in the Q-Q plot should align with the diagonal line,
####and the p-value for the Kolmogorov-Smirnov test should be above 0.05, indicating uniformity.
DHARMa::testDispersion(simulationOutput)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0272, p-value = 0.88
## alternative hypothesis: two.sided
###A p-value below 0.05 usually indicates a problem that needs addressing (e.g., using a negative binomial family instead of Poisson).
qqnorm(ranef(model_condom_accessibility)[[1]][,1])
qqline(ranef(model_condom_accessibility)[[1]][,1])
###Normality of Random Effects: Visually inspect the distribution of the extracted random effects using Q-Q plots to ensure they are approximately normal:
###The Q-Q plot compares the distribution of your sample data (the extracted random effects) against a theoretical normal distribution.
###If the data are normally distributed, the points should fall closely along a straight diagonal line.
##The visual inspection of the provided Q-Q plot suggests a deviation from the normal distribution, particularly in the lower (left) tail.
summary(model_condom_accessibility)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## Condom_Access ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +
## Gender + (1 | ID) + (1 | substanceuse) + (1 | Partner_Type)
## Data: TLFB
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 372.1 407.5 -178.1 356.1 606
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.6724 0.0754 0.1515 0.2505 2.2315
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 3.58283 1.8928
## Partner_Type (Intercept) 0.09038 0.3006
## substanceuse (Intercept) 0.00000 0.0000
## Number of obs: 614, groups: ID, 53; Partner_Type, 3; substanceuse, 2
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.75833 0.74630 5.036 4.76e-07 ***
## UCLA_Total_scaled 0.52274 0.35199 1.485 0.1375
## STD_KQ_Total_scaled -0.06485 0.30330 -0.214 0.8307
## BSsum_scaled 0.26525 0.43340 0.612 0.5405
## Gender1 -1.50325 0.85956 -1.749 0.0803 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) UCLA_T STD_KQ BSsm_s
## UCLA_Ttl_sc -0.016
## STD_KQ_Ttl_ 0.151 -0.162
## BSsum_scald -0.059 0.341 -0.309
## Gender1 -0.775 0.238 -0.224 0.378
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# install.packages("sjPlot")
library(sjPlot)
# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(model_condom_accessibility, type = "est")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
## Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(ggeffects)
library(ggplot2) # Optional, for further customization
# Use sjPlot to quickly generate Q-Q plots for all random effects
plot_model(model_condom_accessibility, type = "pred")
## $UCLA_Total_scaled
##
## $STD_KQ_Total_scaled
##
## $BSsum_scaled
##
## $Gender
first_re_plot <- plot_model(model_condom_accessibility, type = "re")[[1]]
se_re_plot <- plot_model(model_condom_accessibility, type = "re")[[2]]
th_re_plot <- plot_model(model_condom_accessibility, type = "re")[[3]]
modified_plot <- first_re_plot +
labs(
title = "Participant ID Random Effects",
x = "ID",
y = "Condom Access",
caption = "Model: Condom_Access"
)
print(modified_plot)
modified_plot2 <- se_re_plot +
labs(
title = "Partner Type Random Effects",
subtitle = "0 = New, 1 = Causal, 2 = Regular",
x = "Partner Type",
y = "Condom Access",
caption = "Model: Condom_Access"
)
print(modified_plot2)
modified_plot3 <- th_re_plot +
labs(
title = "Substance UseRandom Effects",
subtitle = "0 = no use, 1 = use",
x = "Substance use",
y = "Condom Access",
caption = "Model: Condom_Access"
)
print(modified_plot3)
sjPlot::tab_model(model_condom_accessibility)
| Condom_Access | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 42.88 | 9.93 – 185.13 | <0.001 |
| UCLA Total scaled | 1.69 | 0.85 – 3.36 | 0.138 |
| STD KQ Total scaled | 0.94 | 0.52 – 1.70 | 0.831 |
| BSsum scaled | 1.30 | 0.56 – 3.05 | 0.541 |
| Gender [1] | 0.22 | 0.04 – 1.20 | 0.080 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 ID | 3.58 | ||
| τ00 Partner_Type | 0.09 | ||
| τ00 substanceuse | 0.00 | ||
| N ID | 53 | ||
| N substanceuse | 2 | ||
| N Partner_Type | 3 | ||
| Observations | 614 | ||
| Marginal R2 / Conditional R2 | 0.255 / NA | ||
library(performance)
# This will return the correct ICC calculation for your model
#icc(model_condom_accessibility)
##Warning message:
#Can't compute random effect variances. Some variance components equal zero. Your model may suffer from singularity (see
# `?lme4::isSingular` and `?performance::check_singularity`).
# Decrease the `tolerance` level to force the calculation of random effect variances, or impose priors on your random effects
# parameters (using packages like `brms` or `glmmTMB`).
########STD
###Fit the full model
model_STD <-glmer(STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled + Gender +
(1 |ID)+
(1 |substanceuse)+
(1 |Partner_Type),
data = TLFB, family = binomial(link = "logit"),
glmerControl(optimizer = "bobyqa"))
####Overall Fit & Uniformity: Generate simulated residuals and plot them
require(DHARMa)
simulationOutput_STD <- DHARMa::simulateResiduals(fittedModel = model_STD, plot = TRUE)
####The points in the Q-Q plot should align with the diagonal line,
####and the p-value for the Kolmogorov-Smirnov test should be above 0.05, indicating uniformity.
DHARMa::testDispersion(simulationOutput_STD)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2824, p-value = 0.256
## alternative hypothesis: two.sided
###A p-value below 0.05 usually indicates a problem that needs addressing (e.g., using a negative binomial family instead of Poisson).
qqnorm(ranef(model_STD)[[1]][,1])
qqline(ranef(model_STD)[[1]][,1])
###Normality of Random Effects: Visually inspect the distribution of the extracted random effects using Q-Q plots to ensure they are approximately normal:
###The Q-Q plot compares the distribution of your sample data (the extracted random effects) against a theoretical normal distribution.
###If the data are normally distributed, the points should fall closely along a straight diagonal line.
##The visual inspection of the provided Q-Q plot suggests a deviation from the normal distribution, particularly in the lower (left) tail.
summary(model_STD)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: STD ~ UCLA_Total_scaled + STD_KQ_Total_scaled + BSsum_scaled +
## Gender + (1 | ID) + (1 | substanceuse) + (1 | Partner_Type)
## Data: TLFB
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik -2*log(L) df.resid
## 322.0 357.4 -153.0 306.0 606
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.8728 -0.1636 -0.0400 0.0835 5.7124
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID (Intercept) 43.8399 6.6212
## Partner_Type (Intercept) 5.0935 2.2569
## substanceuse (Intercept) 0.3901 0.6246
## Number of obs: 614, groups: ID, 53; Partner_Type, 3; substanceuse, 2
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1776 2.6287 0.448 0.654
## UCLA_Total_scaled -0.8105 1.3274 -0.611 0.541
## STD_KQ_Total_scaled 0.7914 0.8822 0.897 0.370
## BSsum_scaled 1.9819 1.5009 1.320 0.187
## Gender1 -2.9447 2.9905 -0.985 0.325
##
## Correlation of Fixed Effects:
## (Intr) UCLA_T STD_KQ BSsm_s
## UCLA_Ttl_sc -0.153
## STD_KQ_Ttl_ 0.159 -0.284
## BSsum_scald -0.065 0.459 -0.333
## Gender1 -0.691 0.318 -0.263 0.352
# install.packages("sjPlot")
library(sjPlot)
# Plot the fixed effects (estimates transformed from log-odds to odds ratios by default)
plot_model(model_STD, type = "est")
library(ggeffects)
library(ggplot2) # Optional, for further customization
# Use sjPlot to quickly generate Q-Q plots for all random effects
plot_model(model_STD, type = "pred")
## $UCLA_Total_scaled
##
## $STD_KQ_Total_scaled
##
## $BSsum_scaled
##
## $Gender
first_re_plot <- plot_model(model_STD, type = "re")[[1]]
se_re_plot <- plot_model(model_STD, type = "re")[[2]]
th_re_plot <- plot_model(model_STD, type = "re")[[3]]
modified_plot <- first_re_plot +
labs(
title = "Participant ID Random Effects",
x = "ID",
y = "STD",
caption = "Model: STD"
)
print(modified_plot)
modified_plot2 <- se_re_plot +
labs(
title = "Partner Type Random Effects",
subtitle = "0 = New, 1 = Causal, 2 = Regular",
x = "Partner Type",
y = "STD",
caption = "Model: STD"
)
print(modified_plot2)
modified_plot3 <- th_re_plot +
labs(
title = "Substance UseRandom Effects",
subtitle = "0 = no use, 1 = use",
x = "Substance use",
y = "STD",
caption = "Model: STD"
)
print(modified_plot3)
tab_model(model_STD)
| STD | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 3.25 | 0.02 – 561.04 | 0.654 |
| UCLA Total scaled | 0.44 | 0.03 – 6.00 | 0.541 |
| STD KQ Total scaled | 2.21 | 0.39 – 12.43 | 0.370 |
| BSsum scaled | 7.26 | 0.38 – 137.50 | 0.187 |
| Gender [1] | 0.05 | 0.00 – 18.48 | 0.325 |
| Random Effects | |||
| σ2 | 3.29 | ||
| τ00 ID | 43.84 | ||
| τ00 Partner_Type | 5.09 | ||
| τ00 substanceuse | 0.39 | ||
| ICC | 0.94 | ||
| N ID | 53 | ||
| N substanceuse | 2 | ||
| N Partner_Type | 3 | ||
| Observations | 614 | ||
| Marginal R2 / Conditional R2 | 0.166 / 0.948 | ||
##The table provides Odds Ratios (OR), their 95% Confidence Intervals (CI), and associated p-values for the main predictors. An OR greater than 1 means an increased odds of the outcome (STD), while an OR less than 1 means decreased odds.
##The random effects describe how much individual participants vary from the overall average effects. These values are reported as variances (\(\tau _{00}\)) and standard deviation (\(\sigma ^{2}\)).
##ICC (Intraclass Correlation Coefficient): 0.94
####This is a very high ICC.
#####It suggests that about 94% of the total variability in the outcome (STD status) is attributable to differences between individuals (ID), rather than differences within individuals across different observations. This confirms that using a mixed-effects model was absolutely necessary, as individual identity matters profoundly in predicting the outcome.