Code
# Import data from Excel sheets
venues <- read_excel("TLSmsm.xlsx", sheet = "Venue Universe")
counts <- read_excel("TLSmsm.xlsx", sheet = "Counts")
msm <- read_excel("TLSmsm.xlsx", sheet = "MSMdata")Previous studies have shown that heavy episodic drinking (binge drinking) is highly prevalent among men who have sex with men (MSM) and is associated with sexual risk behaviors and HIV seroconversion. This analysis uses data from the 2017 National HIV Behavioral Surveillance (NHBS) in San Francisco to assess the magnitude of binge drinking and explore its correlates among MSM.
# Import data from Excel sheets
venues <- read_excel("TLSmsm.xlsx", sheet = "Venue Universe")
counts <- read_excel("TLSmsm.xlsx", sheet = "Counts")
msm <- read_excel("TLSmsm.xlsx", sheet = "MSMdata")# Categorize venues
venues$vtype[venues$Category %in% c("Bars", "Social Org", "club")] <- 1
venues$vtype[venues$Category %in% c("Adult Bookstore", "Coffe Shop/Sex Store", "Sex Club")] <- 2
venues$vtype[venues$Category %in% c("Dance Party", "Festival", "Special Event", "Pride Party")] <- 3
venues$vtype[venues$Category %in% c("Hotel", "Gym")] <- 4
venues$vtype[venues$Category %in% c("Park", "Beach", "Street")] <- 5
venues$vtype[venues$Category == "App"] <- 6
# Convert to factor with labels
venues$vtype <- factor(venues$vtype,
levels = c("1", "2", "3", "4", "5", "6"),
labels = c("Bar/Club", "Store", "Event", "Hotel/Gym", "Public", "App"))
# Display venue distribution
kable(table(venues$vtype, useNA = "always"),
caption = "Distribution of Venue Types") %>%
kable_styling(bootstrap_options = c("striped", "hover"))| Var1 | Freq |
|---|---|
| Bar/Club | 33 |
| Store | 4 |
| Event | 6 |
| Hotel/Gym | 3 |
| Public | 4 |
| App | 7 |
| NA | 0 |
# Calculate sampling probability
counts$samplingprob <- counts$recruited / counts$counted
# Merge MSM data with counts
msm2 <- merge(msm, counts,
by.x = c("venue", "rdate"),
by.y = c("venue", "rdate"),
all.x = TRUE, all.y = TRUE)
# Calculate weights
msm2$weight <- 1 / msm2$samplingprob
msm2$normalized_weight <- msm2$weight / mean(msm2$weight)
# Display summary of weights
summary_stats <- data.frame(
Statistic = c("Min", "Median", "Mean", "Max"),
Weight = round(c(min(msm2$weight), median(msm2$weight),
mean(msm2$weight), max(msm2$weight)), 2),
Normalized_Weight = round(c(min(msm2$normalized_weight),
median(msm2$normalized_weight),
mean(msm2$normalized_weight),
max(msm2$normalized_weight)), 2)
)
kable(summary_stats, caption = "Summary Statistics of Sampling Weights") %>%
kable_styling(bootstrap_options = c("striped", "hover"))| Statistic | Weight | Normalized_Weight |
|---|---|---|
| Min | 2.00 | 0.16 |
| Median | 8.50 | 0.69 |
| Mean | 12.26 | 1.00 |
| Max | 140.00 | 11.42 |
# Create binary variable for binge drinking
msm2$binge_drinking <- ifelse(msm2$alc530d >= 1 & msm2$alc530d <= 70, 1, 0)
msm2$binge_drinking <- factor(msm2$binge_drinking,
levels = c(0, 1),
labels = c("No", "Yes"))
# Calculate crude prevalence
crude_prev <- binom.confint(sum(msm2$binge_drinking == "Yes", na.rm = TRUE),
sum(!is.na(msm2$binge_drinking)),
method = "wilson")
# Create survey design object
msm2$UID <- paste(msm2$venue, "_", msm2$rdate)
TLSsvy <- svydesign(ids = ~UID,
weights = ~normalized_weight,
nest = TRUE,
data = msm2)
# Calculate weighted prevalence
weighted_prev <- svyciprop(~I(binge_drinking == "Yes"),
TLSsvy,
method = "logit")
# Create prevalence table
prev_results <- data.frame(
Measure = c("Crude Prevalence", "Weighted Prevalence"),
Estimate = c(crude_prev$mean[1], coef(weighted_prev)),
LowerCI = c(crude_prev$lower[1], attr(weighted_prev, "ci")[1]),
UpperCI = c(crude_prev$upper[1], attr(weighted_prev, "ci")[2])
)
kable(prev_results,
col.names = c("Measure", "Estimate", "95% CI Lower", "95% CI Upper"),
digits = 3,
caption = "Binge Drinking Prevalence Estimates") %>%
kable_styling(bootstrap_options = c("striped", "hover"))| Measure | Estimate | 95% CI Lower | 95% CI Upper | |
|---|---|---|---|---|
| Crude Prevalence | 0.436 | 0.390 | 0.483 | |
| I(binge_drinking == "Yes") | Weighted Prevalence | 0.451 | 0.369 | 0.535 |
# Create demographic variables
msm2$age_group <- cut(msm2$age,
breaks = c(19, 24, 34, 44, Inf),
labels = c("19-24", "25-34", "35-44", ">45"),
include.lowest = TRUE)
msm2$race_cat <- factor(msm2$race,
levels = c("1", "2", "3", "4", "5"),
labels = c("White", "Native", "BlackAA", "Asian", "Multiple"))
msm2$income25k <- factor(ifelse(msm2$hhincom <= 6, 0, 1),
levels = c(0, 1),
labels = c("<25000$Y", ">=25000$Y"))
msm2$educat <- factor(ifelse(msm2$school <= 3, 0, 1),
levels = c(0, 1),
labels = c("Some grades", "College or Higher"))
msm2$us_born <- factor(ifelse(msm2$country <= 1, 1, 0),
levels = c(0, 1),
labels = c("No", "Yes"))
msm2$age_first_sex <- cut(msm2$m_mdebut,
breaks = c(4, 19, 24, 34, Inf),
labels = c("<19", "20-24", "25-34", ">35"),
include.lowest = TRUE)
msm2$drug_treatment <- factor(msm2$drugtxr5,
levels = c(0, 1),
labels = c("No", "Yes"))
msm2$hiv_pos <- factor(msm2$everpos,
levels = c(0, 1),
labels = c("No", "Yes"))
msm2$hcv_pos <- factor(msm2$hepcever,
levels = c(0, 1),
labels = c("No", "Yes"))
# Create a survey design object
TLSsvy <- svydesign(ids = ~ UID, weights = ~ normalized_weight, nest = TRUE, data = msm2)
TLS_data <- TLSsvy$variables # This accesses the original data frame used in the survey design
# Proceed to create Table 1 if variables are present
if(all(c("age_group", "race_cat", "income25k", "us_born", "educat",
"age_first_sex", "hiv_pos", "hcv_pos", "drug_treatment") %in% names(TLS_data))) {
# Create Table 1
table1wght <- svyCreateTableOne(vars = c("age_group", "race_cat", "income25k",
"us_born", "educat", "age_first_sex",
"hiv_pos", "hcv_pos", "drug_treatment"),
data = TLSsvy)
print(table1wght, contDigits = 1, catDigits = 1, pDigits = 3, showAllLevels = TRUE)
} else {
cat("One or more specified variables are missing from the dataset.\n")
}
level Overall
n 504.0
age_group (%) 19-24 33.5 ( 6.6)
25-34 160.0 (31.8)
35-44 108.8 (21.6)
>45 201.7 (40.0)
race_cat (%) White 439.2 (87.1)
Native 48.1 ( 9.6)
BlackAA 3.8 ( 0.7)
Asian 3.3 ( 0.6)
Multiple 9.6 ( 1.9)
income25k (%) <25000$Y 101.7 (20.2)
>=25000$Y 401.7 (79.8)
us_born (%) No 73.7 (14.6)
Yes 429.8 (85.4)
educat (%) Some grades 64.6 (12.8)
College or Higher 439.4 (87.2)
age_first_sex (%) <19 358.2 (71.2)
20-24 100.3 (19.9)
25-34 36.6 ( 7.3)
>35 8.2 ( 1.6)
hiv_pos (%) No 388.6 (78.7)
Yes 105.0 (21.3)
hcv_pos (%) No 389.4 (95.5)
Yes 18.3 ( 4.5)
drug_treatment (%) No 484.2 (96.2)
Yes 19.1 ( 3.8)
# Create Table 1
table1wght <- svyCreateTableOne(
vars = c("age_group", "race_cat", "income25k", "us_born",
"educat", "age_first_sex", "hiv_pos", "hcv_pos",
"drug_treatment"),
data = TLSsvy)
# Convert to data frame and format
table1_df <- print(table1wght,
contDigits = 1,
catDigits = 1,
pDigits = 3,
showAllLevels = TRUE,
printToggle = FALSE)Age Distribution: The largest age group represented was participants aged 25-34 (31.8%), followed by those aged >45 (40.0%). There was also a significant representation of participants aged 35-44 (21.6%).
Race: The majority of participants identified as White (87.1%), with smaller proportions identifying as Native (9.6%), Black/African American (0.7%), Asian (0.6%), and Multiple (1.9%).
Income: A substantial proportion of participants reported an annual income of >=25,000 USD (79.8%), with a smaller percentage earning <25,000 USD (20.2%).
US Born: Most participants were born in the United States (85.4%), while only 14.6% were not.
Education: A significant percentage of participants had achieved College or Higher education (87.2%), with a minority reporting Some grades (12.8%).
Age at First Sexual Encounter: A majority of participants had their first sexual experience before the age of 19 (71.2%).
HIV Status: About 21.3% of participants reported being HIV positive, while the remaining 78.7% were HIV negative.
HCV Status: A very small proportion of participants reported being HCV positive (4.5%).
Drug Treatment: A large majority (96.2%) of participants had never been treated for drug use.
# Create Table 2
table2wght <- svyCreateTableOne(
vars = c("age_group", "race_cat", "income25k", "us_born",
"educat", "age_first_sex", "hiv_pos", "hcv_pos",
"drug_treatment"),
strata = "binge_drinking",
data = TLSsvy,
test = TRUE)
# Print Table 2
print(table2wght,
contDigits = 1,
catDigits = 1,
pDigits = 3,
showAllLevels = TRUE) Stratified by binge_drinking
level No Yes p test
n 245.7 201.9
age_group (%) 19-24 8.6 ( 3.5) 23.6 (11.7) <0.001
25-34 66.4 (27.0) 85.4 (42.3)
35-44 42.7 (17.4) 57.3 (28.4)
>45 128.1 (52.1) 35.6 (17.6)
race_cat (%) White 215.8 (87.8) 170.1 (84.2) 0.284
Native 19.2 ( 7.8) 25.9 (12.8)
BlackAA 3.2 ( 1.3) 0.6 ( 0.3)
Asian 0.0 ( 0.0) 3.3 ( 1.6)
Multiple 7.6 ( 3.1) 2.1 ( 1.0)
income25k (%) <25000$Y 49.4 (20.2) 42.0 (20.8) 0.877
>=25000$Y 195.7 (79.8) 159.9 (79.2)
us_born (%) No 42.9 (17.5) 25.7 (12.8) 0.320
Yes 202.4 (82.5) 176.1 (87.2)
educat (%) Some grades 24.8 (10.1) 28.1 (13.9) 0.317
College or Higher 220.9 (89.9) 173.8 (86.1)
age_first_sex (%) <19 162.6 (66.2) 151.4 (75.2) 0.338
20-24 54.6 (22.2) 37.6 (18.7)
25-34 22.5 ( 9.2) 10.2 ( 5.0)
>35 6.0 ( 2.4) 2.0 ( 1.0)
hiv_pos (%) No 188.2 (78.7) 162.5 (82.0) 0.537
Yes 50.8 (21.3) 35.7 (18.0)
hcv_pos (%) No 192.7 (96.1) 155.9 (96.6) 0.825
Yes 7.8 ( 3.9) 5.5 ( 3.4)
drug_treatment (%) No 236.1 (96.4) 200.5 (99.3) 0.005
Yes 8.9 ( 3.6) 1.4 ( 0.7)
Age Group: A significant difference was observed between age groups and binge drinking behavior (p < 0.001). Younger participants, especially those aged 25-34 (42.3%), were more likely to engage in binge drinking, while those aged >45 were less likely to binge drink (17.6% vs. 52.1% in non-binge drinkers).
Race: There were no significant racial differences between binge drinkers and non-binge drinkers (p = 0.284). However, a slightly higher percentage of Native participants reported binge drinking (12.8%) compared to non-binge drinkers (7.8%).
Income: No significant difference in income levels was found between the two groups, with both binge drinkers and non-binge drinkers having a similar distribution of income (p = 0.877).
US Born: Although more US-born participants reported binge drinking (87.2%) than non-binge drinking (82.5%), the difference was not statistically significant (p = 0.320).
Education: Educational attainment did not significantly differ between binge drinkers and non-binge drinkers (p = 0.317), though those with College or Higher education had slightly lower percentages of binge drinking.
Age at First Sexual Encounter: A higher percentage of binge drinkers reported having their first sexual encounter before the age of 19 (75.2%) compared to non-binge drinkers (66.2%), but the difference was not statistically significant (p = 0.338).
Drug Treatment: Binge drinkers were significantly less likely to have received drug treatment (0.7% vs. 3.6% in non-binge drinkers, p = 0.005), suggesting an inverse relationship between binge drinking and drug treatment.