Time Location Sampling (TLS) Analysis

Author

Ntombizodwa Nyoni

Published

October 23, 2024

1 Data Import and Preparation

1.1 Research Background

Previous studies have shown that heavy episodic drinking (binge drinking) is highly prevalent among men who have sex with men (MSM) and is associated with sexual risk behaviors and HIV seroconversion. This analysis uses data from the 2017 National HIV Behavioral Surveillance (NHBS) in San Francisco to assess the magnitude of binge drinking and explore its correlates among MSM.

1.2 Import Data

Code
# Import data from Excel sheets
venues <- read_excel("TLSmsm.xlsx", sheet = "Venue Universe")
counts <- read_excel("TLSmsm.xlsx", sheet = "Counts")
msm <- read_excel("TLSmsm.xlsx", sheet = "MSMdata")

1.3 Venue Categorization

Code
# Categorize venues
venues$vtype[venues$Category %in% c("Bars", "Social Org", "club")] <- 1
venues$vtype[venues$Category %in% c("Adult Bookstore", "Coffe Shop/Sex Store", "Sex Club")] <- 2
venues$vtype[venues$Category %in% c("Dance Party", "Festival", "Special Event", "Pride Party")] <- 3
venues$vtype[venues$Category %in% c("Hotel", "Gym")] <- 4
venues$vtype[venues$Category %in% c("Park", "Beach", "Street")] <- 5
venues$vtype[venues$Category == "App"] <- 6

# Convert to factor with labels
venues$vtype <- factor(venues$vtype, 
                      levels = c("1", "2", "3", "4", "5", "6"),
                      labels = c("Bar/Club", "Store", "Event", "Hotel/Gym", "Public", "App"))

# Display venue distribution
kable(table(venues$vtype, useNA = "always"),
      caption = "Distribution of Venue Types") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Distribution of Venue Types
Var1 Freq
Bar/Club 33
Store 4
Event 6
Hotel/Gym 3
Public 4
App 7
NA 0

2 Sampling Weight Calculation

Code
# Calculate sampling probability
counts$samplingprob <- counts$recruited / counts$counted

# Merge MSM data with counts
msm2 <- merge(msm, counts, 
              by.x = c("venue", "rdate"), 
              by.y = c("venue", "rdate"), 
              all.x = TRUE, all.y = TRUE)

# Calculate weights
msm2$weight <- 1 / msm2$samplingprob
msm2$normalized_weight <- msm2$weight / mean(msm2$weight)

# Display summary of weights
summary_stats <- data.frame(
  Statistic = c("Min", "Median", "Mean", "Max"),
  Weight = round(c(min(msm2$weight), median(msm2$weight), 
                  mean(msm2$weight), max(msm2$weight)), 2),
  Normalized_Weight = round(c(min(msm2$normalized_weight), 
                            median(msm2$normalized_weight),
                            mean(msm2$normalized_weight), 
                            max(msm2$normalized_weight)), 2)
)

kable(summary_stats, caption = "Summary Statistics of Sampling Weights") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Summary Statistics of Sampling Weights
Statistic Weight Normalized_Weight
Min 2.00 0.16
Median 8.50 0.69
Mean 12.26 1.00
Max 140.00 11.42

3 Binge Drinking Analysis

3.1 Create Binary Variable and Calculate Prevalence

Code
# Create binary variable for binge drinking
msm2$binge_drinking <- ifelse(msm2$alc530d >= 1 & msm2$alc530d <= 70, 1, 0)
msm2$binge_drinking <- factor(msm2$binge_drinking, 
                             levels = c(0, 1), 
                             labels = c("No", "Yes"))

# Calculate crude prevalence
crude_prev <- binom.confint(sum(msm2$binge_drinking == "Yes", na.rm = TRUE), 
                           sum(!is.na(msm2$binge_drinking)),
                           method = "wilson")

# Create survey design object
msm2$UID <- paste(msm2$venue, "_", msm2$rdate)
TLSsvy <- svydesign(ids = ~UID, 
                    weights = ~normalized_weight, 
                    nest = TRUE, 
                    data = msm2)

# Calculate weighted prevalence
weighted_prev <- svyciprop(~I(binge_drinking == "Yes"), 
                          TLSsvy, 
                          method = "logit")

# Create prevalence table
prev_results <- data.frame(
  Measure = c("Crude Prevalence", "Weighted Prevalence"),
  Estimate = c(crude_prev$mean[1], coef(weighted_prev)),
  LowerCI = c(crude_prev$lower[1], attr(weighted_prev, "ci")[1]),
  UpperCI = c(crude_prev$upper[1], attr(weighted_prev, "ci")[2])
)

kable(prev_results,
      col.names = c("Measure", "Estimate", "95% CI Lower", "95% CI Upper"),
      digits = 3,
      caption = "Binge Drinking Prevalence Estimates") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Binge Drinking Prevalence Estimates
Measure Estimate 95% CI Lower 95% CI Upper
Crude Prevalence 0.436 0.390 0.483
I(binge_drinking == "Yes") Weighted Prevalence 0.451 0.369 0.535

4 Demographic Characteristics (Table 1)

Code
# Create demographic variables
msm2$age_group <- cut(msm2$age, 
                      breaks = c(19, 24, 34, 44, Inf),
                      labels = c("19-24", "25-34", "35-44", ">45"),
                      include.lowest = TRUE)

msm2$race_cat <- factor(msm2$race,
                       levels = c("1", "2", "3", "4", "5"),
                       labels = c("White", "Native", "BlackAA", "Asian", "Multiple"))

msm2$income25k <- factor(ifelse(msm2$hhincom <= 6, 0, 1),
                        levels = c(0, 1),
                        labels = c("<25000$Y", ">=25000$Y"))

msm2$educat <- factor(ifelse(msm2$school <= 3, 0, 1),
                     levels = c(0, 1),
                     labels = c("Some grades", "College or Higher"))

msm2$us_born <- factor(ifelse(msm2$country <= 1, 1, 0),
                      levels = c(0, 1),
                      labels = c("No", "Yes"))

msm2$age_first_sex <- cut(msm2$m_mdebut,
                         breaks = c(4, 19, 24, 34, Inf),
                         labels = c("<19", "20-24", "25-34", ">35"),
                         include.lowest = TRUE)

msm2$drug_treatment <- factor(msm2$drugtxr5,
                            levels = c(0, 1),
                            labels = c("No", "Yes"))

msm2$hiv_pos <- factor(msm2$everpos,
                      levels = c(0, 1),
                      labels = c("No", "Yes"))

msm2$hcv_pos <- factor(msm2$hepcever,
                      levels = c(0, 1),
                      labels = c("No", "Yes"))

# Create a survey design object
TLSsvy <- svydesign(ids = ~ UID, weights = ~ normalized_weight, nest = TRUE, data = msm2)

TLS_data <- TLSsvy$variables  # This accesses the original data frame used in the survey design


# Proceed to create Table 1 if variables are present
if(all(c("age_group", "race_cat", "income25k", "us_born", "educat", 
         "age_first_sex", "hiv_pos", "hcv_pos", "drug_treatment") %in% names(TLS_data))) {
  
  # Create Table 1
  table1wght <- svyCreateTableOne(vars = c("age_group", "race_cat", "income25k", 
                                           "us_born", "educat", "age_first_sex", 
                                           "hiv_pos", "hcv_pos", "drug_treatment"), 
                                  data = TLSsvy)
  print(table1wght, contDigits = 1, catDigits = 1, pDigits = 3, showAllLevels = TRUE)
} else {
  cat("One or more specified variables are missing from the dataset.\n")
}
                    
                     level             Overall      
  n                                    504.0        
  age_group (%)      19-24              33.5 ( 6.6) 
                     25-34             160.0 (31.8) 
                     35-44             108.8 (21.6) 
                     >45               201.7 (40.0) 
  race_cat (%)       White             439.2 (87.1) 
                     Native             48.1 ( 9.6) 
                     BlackAA             3.8 ( 0.7) 
                     Asian               3.3 ( 0.6) 
                     Multiple            9.6 ( 1.9) 
  income25k (%)      <25000$Y          101.7 (20.2) 
                     >=25000$Y         401.7 (79.8) 
  us_born (%)        No                 73.7 (14.6) 
                     Yes               429.8 (85.4) 
  educat (%)         Some grades        64.6 (12.8) 
                     College or Higher 439.4 (87.2) 
  age_first_sex (%)  <19               358.2 (71.2) 
                     20-24             100.3 (19.9) 
                     25-34              36.6 ( 7.3) 
                     >35                 8.2 ( 1.6) 
  hiv_pos (%)        No                388.6 (78.7) 
                     Yes               105.0 (21.3) 
  hcv_pos (%)        No                389.4 (95.5) 
                     Yes                18.3 ( 4.5) 
  drug_treatment (%) No                484.2 (96.2) 
                     Yes                19.1 ( 3.8) 
Code
# Create Table 1
table1wght <- svyCreateTableOne(
  vars = c("age_group", "race_cat", "income25k", "us_born", 
           "educat", "age_first_sex", "hiv_pos", "hcv_pos", 
           "drug_treatment"),
  data = TLSsvy)

# Convert to data frame and format
table1_df <- print(table1wght, 
                   contDigits = 1, 
                   catDigits = 1, 
                   pDigits = 3, 
                   showAllLevels = TRUE,
                   printToggle = FALSE)

5 Key Findings from Table 1

  • Age Distribution: The largest age group represented was participants aged 25-34 (31.8%), followed by those aged >45 (40.0%). There was also a significant representation of participants aged 35-44 (21.6%).

  • Race: The majority of participants identified as White (87.1%), with smaller proportions identifying as Native (9.6%), Black/African American (0.7%), Asian (0.6%), and Multiple (1.9%).

  • Income: A substantial proportion of participants reported an annual income of >=25,000 USD (79.8%), with a smaller percentage earning <25,000 USD (20.2%).

  • US Born: Most participants were born in the United States (85.4%), while only 14.6% were not.

  • Education: A significant percentage of participants had achieved College or Higher education (87.2%), with a minority reporting Some grades (12.8%).

  • Age at First Sexual Encounter: A majority of participants had their first sexual experience before the age of 19 (71.2%).

  • HIV Status: About 21.3% of participants reported being HIV positive, while the remaining 78.7% were HIV negative.

  • HCV Status: A very small proportion of participants reported being HCV positive (4.5%).

  • Drug Treatment: A large majority (96.2%) of participants had never been treated for drug use.

6 Bivariate Analysis (Table 2)

Code
# Create Table 2
table2wght <- svyCreateTableOne(
  vars = c("age_group", "race_cat", "income25k", "us_born", 
           "educat", "age_first_sex", "hiv_pos", "hcv_pos", 
           "drug_treatment"),
  strata = "binge_drinking",
  data = TLSsvy,
  test = TRUE)

# Print Table 2
print(table2wght, 
      contDigits = 1, 
      catDigits = 1, 
      pDigits = 3, 
      showAllLevels = TRUE)
                    Stratified by binge_drinking
                     level             No            Yes           p      test
  n                                    245.7         201.9                    
  age_group (%)      19-24               8.6 ( 3.5)   23.6 (11.7)  <0.001     
                     25-34              66.4 (27.0)   85.4 (42.3)             
                     35-44              42.7 (17.4)   57.3 (28.4)             
                     >45               128.1 (52.1)   35.6 (17.6)             
  race_cat (%)       White             215.8 (87.8)  170.1 (84.2)   0.284     
                     Native             19.2 ( 7.8)   25.9 (12.8)             
                     BlackAA             3.2 ( 1.3)    0.6 ( 0.3)             
                     Asian               0.0 ( 0.0)    3.3 ( 1.6)             
                     Multiple            7.6 ( 3.1)    2.1 ( 1.0)             
  income25k (%)      <25000$Y           49.4 (20.2)   42.0 (20.8)   0.877     
                     >=25000$Y         195.7 (79.8)  159.9 (79.2)             
  us_born (%)        No                 42.9 (17.5)   25.7 (12.8)   0.320     
                     Yes               202.4 (82.5)  176.1 (87.2)             
  educat (%)         Some grades        24.8 (10.1)   28.1 (13.9)   0.317     
                     College or Higher 220.9 (89.9)  173.8 (86.1)             
  age_first_sex (%)  <19               162.6 (66.2)  151.4 (75.2)   0.338     
                     20-24              54.6 (22.2)   37.6 (18.7)             
                     25-34              22.5 ( 9.2)   10.2 ( 5.0)             
                     >35                 6.0 ( 2.4)    2.0 ( 1.0)             
  hiv_pos (%)        No                188.2 (78.7)  162.5 (82.0)   0.537     
                     Yes                50.8 (21.3)   35.7 (18.0)             
  hcv_pos (%)        No                192.7 (96.1)  155.9 (96.6)   0.825     
                     Yes                 7.8 ( 3.9)    5.5 ( 3.4)             
  drug_treatment (%) No                236.1 (96.4)  200.5 (99.3)   0.005     
                     Yes                 8.9 ( 3.6)    1.4 ( 0.7)             

7 Key Findings from Table 2

  • Age Group: A significant difference was observed between age groups and binge drinking behavior (p < 0.001). Younger participants, especially those aged 25-34 (42.3%), were more likely to engage in binge drinking, while those aged >45 were less likely to binge drink (17.6% vs. 52.1% in non-binge drinkers).

  • Race: There were no significant racial differences between binge drinkers and non-binge drinkers (p = 0.284). However, a slightly higher percentage of Native participants reported binge drinking (12.8%) compared to non-binge drinkers (7.8%).

  • Income: No significant difference in income levels was found between the two groups, with both binge drinkers and non-binge drinkers having a similar distribution of income (p = 0.877).

  • US Born: Although more US-born participants reported binge drinking (87.2%) than non-binge drinking (82.5%), the difference was not statistically significant (p = 0.320).

  • Education: Educational attainment did not significantly differ between binge drinkers and non-binge drinkers (p = 0.317), though those with College or Higher education had slightly lower percentages of binge drinking.

  • Age at First Sexual Encounter: A higher percentage of binge drinkers reported having their first sexual encounter before the age of 19 (75.2%) compared to non-binge drinkers (66.2%), but the difference was not statistically significant (p = 0.338).

  • Drug Treatment: Binge drinkers were significantly less likely to have received drug treatment (0.7% vs. 3.6% in non-binge drinkers, p = 0.005), suggesting an inverse relationship between binge drinking and drug treatment.

8 References

  1. Centers for Disease Control and Prevention. National HIV Behavioral Surveillance (NHBS).
  2. Santos GM, Jin H, Raymond HF. Prevalence and correlates of hazardous alcohol use among a cohort of young men who have sex with men. Drug Alcohol Depend. 2015.