For this project, I looked to identify the association between the typical place of alcohol consumption and change in alcohol use during the pandemic in a cohort of PLWH in Baltimore, MD.
The data are from the Alcohol Research Consortium in HIV (ARCH) cohort in Baltimore, MD.
Code can be found below.
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(ggeasy)
library(gridExtra)
decreased_df <- data.frame(
Location = c("home", "official", "unofficial"),
Odds_Ratio_Unadjusted = c("0.38 (0.23, 0.64)", "1.50 (0.57, 3.93)", "1.54 (0.69, 1.85)"),
P_value_Unadjusted = c("< 0.01", "0.41", "0.30"),
Odds_Ratio_Adjusted = c("0.54 (0.26, 1.13)", "1.63 (0.62, 4.30)", "1.62 (0.72, 3.67)"),
P_value_Adjusted = c("0.10", "0.33", "0.23"))
# Extract Odds Ratios, Lower and Upper Limits for each group
home_unadj <- c(decreased_df$Odds_Ratio_Unadjusted[1],
decreased_df$OR_Unadj_LL[1],
decreased_df$OR_Unadj_UL[1])
home_adj <- c(decreased_df$Odds_Ratio_Adjusted[1],
decreased_df$OR_Adj_LL[1],
decreased_df$OR_Adj_UL[1])
official_unadj <- c(decreased_df$Odds_Ratio_Unadjusted[2],
decreased_df$OR_Unadj_LL[2],
decreased_df$OR_Unadj_UL[2])
official_adj <- c(decreased_df$Odds_Ratio_Adjusted[2],
decreased_df$OR_Adj_LL[2],
decreased_df$OR_Adj_UL[2])
unofficial_unadj <- c(decreased_df$Odds_Ratio_Unadjusted[3],
decreased_df$OR_Unadj_LL[3],
decreased_df$OR_Unadj_UL[3])
unofficial_adj <- c(decreased_df$Odds_Ratio_Adjusted[3],
decreased_df$OR_Adj_LL[3],
decreased_df$OR_Adj_UL[3])
# Combine into a new dataframe
new_df <- data.frame(Group = c("Home-unadjusted", "Home-adjusted", "Official-unadjusted", "Official-adjusted", "Unofficial-unadjusted", "Unofficial-adjusted"),
Odds_Ratio = c(home_unadj[1], home_adj[1], official_unadj[1], official_adj[1], unofficial_unadj[1], unofficial_adj[1]),
Lower_Limit = c(home_unadj[2], home_adj[2], official_unadj[2], official_adj[2], unofficial_unadj[2], unofficial_adj[2]),
Upper_Limit = c(home_unadj[3], home_adj[3], official_unadj[3], official_adj[3], unofficial_unadj[3], unofficial_adj[3]))
# Extract lower and upper limits from Odds_Ratio column using regular expressions
new_df$Lower_Limit <- as.numeric(sub(".*\\((.*),.*", "\\1", new_df$Odds_Ratio))
new_df$Upper_Limit <- as.numeric(sub(".*,\\s*(.*)\\)", "\\1", new_df$Odds_Ratio))
# Remove the parenthesis from the Odds_Ratio column
new_df$Odds_Ratio <- as.numeric(gsub("\\s*\\(.*", "", new_df$Odds_Ratio))
###
# Create index for each row of data
new_df$Index <- 1:nrow(new_df)
# Define labels for each row of data
new_df$label <- gsub("-", " ", new_df$Group)
# Create dataframe for forest plot
dat <- data.frame(Index = new_df$Index,
label = new_df$label,
OR = new_df$Odds_Ratio,
LL = new_df$Lower_Limit,
UL = new_df$Upper_Limit)
# Plot forest plot -> Decreased versus
plot1 <- ggplot(dat, aes(y = Index, x = OR)) +
geom_point(shape = 18, size = 7, color = '#4169e1') + # increase size for thicker points
geom_errorbarh(aes(xmin = LL, xmax = UL), height = 0.25, size = 1.5) + # increase size for thicker error bars
geom_vline(xintercept = 1, color = "red", linetype = "dashed", cex = 1, alpha = 0.5) +
scale_y_continuous(name = "", breaks=1:6, labels = dat$label, trans = "reverse") +
xlab("Odds Ratio (95% CI)") +
ylab(" ") +
theme_bw() +
ggtitle("Fig. 1: Odds of Decreased Drinking Versus No Change") +
easy_center_title() +
theme(panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.y = element_text(size = 12, colour = "black"),
axis.text.x.bottom = element_text(size = 12, colour = "black"),
axis.title.x = element_text(size = 12, colour = "black"))