chs2020_public <- read_sas("~/BIOS621/Assignment1/chs2020_public.sas7bdat", NULL)
chs2020 <- transform(chs2020_public,
newrace = recode_factor(newrace, `1`="White/N Afri/MidEastern, non-Hispanic",
`2`="Black, non-Hispanic",
`3`="Hispanic",
`4`="Asian/PI, non-Hispanic",
`5`="Other, non-Hispanic"),
agegroup = recode_factor(agegroup,
`1` = "18-24",
`2` = "25-44",
`3` = "45-64",
`4` = "65+"),
hiv12months20 = recode_factor(hiv12months20,
`2` = "No", # put first to make "No" the reference group
`1` = "Yes",
.default = "NA",
.missing = "NA"
),
condom20 = recode_factor(condom20,
`2` = "No",
`1` = "Yes"#,
#.default = "NA",
#.missing = "NA"
),
strata = as.character(strata))
label(chs2020$condom20) <- "Condom Usage"
label(chs2020$newrace) <- "Race & Ethnicity"
label(chs2020$agegroup) <- "Age Category"
label(chs2020$sexpartner) <- "Number of Sex Partners"
caption = "Table 1. HIV Testing"
table1(~ condom20 + newrace + agegroup + sexpartner | hiv12months20, data=chs2020, caption=caption)
No (N=6122) |
Yes (N=2507) |
NA (N=152) |
Overall (N=8781) |
|
---|---|---|---|---|
Condom Usage | ||||
No | 2389 (39.0%) | 1133 (45.2%) | 48 (31.6%) | 3570 (40.7%) |
Yes | 853 (13.9%) | 653 (26.0%) | 17 (11.2%) | 1523 (17.3%) |
Missing | 2880 (47.0%) | 721 (28.8%) | 87 (57.2%) | 3688 (42.0%) |
Race & Ethnicity | ||||
White/N Afri/MidEastern, non-Hispanic | 2362 (38.6%) | 429 (17.1%) | 68 (44.7%) | 2859 (32.6%) |
Black, non-Hispanic | 1122 (18.3%) | 696 (27.8%) | 19 (12.5%) | 1837 (20.9%) |
Hispanic | 1363 (22.3%) | 1068 (42.6%) | 26 (17.1%) | 2457 (28.0%) |
Asian/PI, non-Hispanic | 1089 (17.8%) | 221 (8.8%) | 30 (19.7%) | 1340 (15.3%) |
Other, non-Hispanic | 186 (3.0%) | 93 (3.7%) | 9 (5.9%) | 288 (3.3%) |
Age Category | ||||
18-24 | 449 (7.3%) | 249 (9.9%) | 8 (5.3%) | 706 (8.0%) |
25-44 | 1882 (30.7%) | 1260 (50.3%) | 47 (30.9%) | 3189 (36.3%) |
45-64 | 2108 (34.4%) | 791 (31.6%) | 52 (34.2%) | 2951 (33.6%) |
65+ | 1670 (27.3%) | 206 (8.2%) | 45 (29.6%) | 1921 (21.9%) |
Missing | 13 (0.2%) | 1 (0.0%) | 0 (0%) | 14 (0.2%) |
Number of Sex Partners | ||||
Mean (SD) | 1.74 (0.735) | 2.16 (0.922) | 1.65 (0.703) | 1.86 (0.817) |
Median [Min, Max] | 2.00 [1.00, 4.00] | 2.00 [1.00, 4.00] | 2.00 [1.00, 4.00] | 2.00 [1.00, 4.00] |
Missing | 652 (10.7%) | 179 (7.1%) | 30 (19.7%) | 861 (9.8%) |
tableone <- tableone::CreateTableOne(data = chs2020, strata="hiv12months20", vars = (c("condom20","newrace","agegroup","sexpartner")), addOverall=TRUE)
kableone(tableone, caption="Additional Table 1: Characteristics of 2020 NYC CHS Survey Respondents Stratified by HIV Testing")
Overall | No | Yes | NA | p | test | |
---|---|---|---|---|---|---|
n | 8781 | 6122 | 2507 | 152 | ||
condom20 = Yes (%) | 1523 (29.9) | 853 (26.3) | 653 (36.6) | 17 (26.2) | <0.001 | |
newrace (%) | <0.001 | |||||
White/N Afri/MidEastern, non-Hispanic | 2859 (32.6) | 2362 (38.6) | 429 (17.1) | 68 (44.7) | ||
Black, non-Hispanic | 1837 (20.9) | 1122 (18.3) | 696 (27.8) | 19 (12.5) | ||
Hispanic | 2457 (28.0) | 1363 (22.3) | 1068 (42.6) | 26 (17.1) | ||
Asian/PI, non-Hispanic | 1340 (15.3) | 1089 (17.8) | 221 ( 8.8) | 30 (19.7) | ||
Other, non-Hispanic | 288 ( 3.3) | 186 ( 3.0) | 93 ( 3.7) | 9 ( 5.9) | ||
agegroup (%) | <0.001 | |||||
18-24 | 706 ( 8.1) | 449 ( 7.3) | 249 ( 9.9) | 8 ( 5.3) | ||
25-44 | 3189 (36.4) | 1882 (30.8) | 1260 (50.3) | 47 (30.9) | ||
45-64 | 2951 (33.7) | 2108 (34.5) | 791 (31.6) | 52 (34.2) | ||
65+ | 1921 (21.9) | 1670 (27.3) | 206 ( 8.2) | 45 (29.6) | ||
sexpartner (mean (SD)) | 1.86 (0.82) | 1.74 (0.74) | 2.16 (0.92) | 1.65 (0.70) | <0.001 |
The 2020 NYC Community Health Survey data set contains responses from 8,781 individuals with the majority of participants belonging in the 25-44 age group (36.3% of respondents), followed by the 45-64 age group (33.6%), 65+ (21.9%), and 18-24 (8.0%).
Out of the five variables we are focused on, the “Condom Usage” variable had the most missing responses with 42% of participants not providing an answer.
According to the 2020 Decennial Census, New York City’s population was 30.9% White (non-Hispanic), 28.7% Hispanic or Latino, 20.2% Black or African American (non-Hispanic), 15.6% Asian, and 3.4% non-Hispanic other. The 2020 NYC CHS has a comparable race and ethnic demographic spread to the city’s entire population.
# Additional table to view frequency distribution between HIV testing and sex partner variables.
expss::cross_cases(chs2020, sexpartner, hiv12months20)
hiv12months20 | |||
---|---|---|---|
No | Yes | NA | |
Number of Sex Partners | |||
1 | 2139 | 492 | 55 |
2 | 2887 | 1317 | 59 |
3 | 193 | 177 | 4 |
4 | 251 | 342 | 4 |
#Total cases | 5470 | 2328 | 122 |
Graphical visualization of the contingency table
mosaicplot(table(chs2020$condom20, chs2020$hiv12months20), main = "HIV Testing and Condom Use", xlab="HIV Testing", ylab="Condom Use")
ggplot(chs2020, aes(reorder(condom20, condom20, length), fill=hiv12months20)) +
geom_bar() +
xlab("Condom Usage") +
ylab("HIV Testing Count") +
labs(title="HIV Testing by Condom Usage",
subtitle="Among 2020 NYC Community Health Survey Participants") +
scale_fill_brewer(palette="Set2")
I want to view the same data grouped by the sexpartner
variable, which corresponds to the number of sexual partners a recipient
had in 12 months: 1 = no partners, 2 = 1
partner, 3 = 2 partners, and 4 = 3 or
more partners. I filtered out sexpartner = 1 because there is no
condom usage data for that subgroup.
data_nopartners <- chs2020 %>%
filter(sexpartner != 1)
ggplot(data_nopartners, aes(reorder(condom20, condom20, length), fill=hiv12months20)) +
geom_bar() +
facet_wrap(data_nopartners$sexpartner) +
xlab("Condom Usage") +
ylab("HIV Testing Count") +
labs(title="HIV Testing by Condom Usage Grouped by Sex Partner Category",
subtitle="Among 2020 NYC Community Health Survey Participants") +
scale_fill_brewer(palette="Set2")
Model 1 to predict HIV testing using condom usage variable only
model1 <- glm(hiv12months20 ~ condom20,
data = chs2020,
family=binomial("logit"))
summary(model1)
##
## Call:
## glm(formula = hiv12months20 ~ condom20, family = binomial("logit"),
## data = chs2020)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.70451 0.03557 -19.806 < 2e-16 ***
## condom20Yes 0.46303 0.06269 7.386 1.51e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6675.6 on 5092 degrees of freedom
## Residual deviance: 6621.4 on 5091 degrees of freedom
## (3688 observations deleted due to missingness)
## AIC: 6625.4
##
## Number of Fisher Scoring iterations: 4
#exp(coef(model1))
Model 2 to predict HIV testing using condom usage and number of sex partner variables
model2 <- glm(hiv12months20 ~ condom20 + sexpartner,
data=chs2020,
family=binomial("logit"))
summary(model2)
##
## Call:
## glm(formula = hiv12months20 ~ condom20 + sexpartner, family = binomial("logit"),
## data = chs2020)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.82794 0.10560 -17.310 < 2e-16 ***
## condom20Yes 0.28159 0.06568 4.287 1.81e-05 ***
## sexpartner 0.50878 0.04480 11.356 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6675.6 on 5092 degrees of freedom
## Residual deviance: 6490.7 on 5090 degrees of freedom
## (3688 observations deleted due to missingness)
## AIC: 6496.7
##
## Number of Fisher Scoring iterations: 4
#exp(coef(model2))
Model 3 to predict HIV testing using condom usage and all other variables
model3 <- glm(hiv12months20 ~ condom20 + agegroup + newrace + sexpartner,
data=chs2020,
family=binomial("logit"))
summary(model3)
##
## Call:
## glm(formula = hiv12months20 ~ condom20 + agegroup + newrace +
## sexpartner, family = binomial("logit"), data = chs2020)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.36933 0.18348 -12.913 < 2e-16 ***
## condom20Yes 0.17329 0.06999 2.476 0.0133 *
## agegroup25-44 0.18142 0.11961 1.517 0.1293
## agegroup45-64 -0.28981 0.12576 -2.304 0.0212 *
## agegroup65+ -1.03538 0.16405 -6.312 2.76e-10 ***
## newraceBlack, non-Hispanic 1.22348 0.08953 13.666 < 2e-16 ***
## newraceHispanic 1.23358 0.08081 15.266 < 2e-16 ***
## newraceAsian/PI, non-Hispanic 0.05373 0.11450 0.469 0.6389
## newraceOther, non-Hispanic 0.76815 0.17500 4.390 1.14e-05 ***
## sexpartner 0.49487 0.04820 10.268 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6673.8 on 5090 degrees of freedom
## Residual deviance: 5982.6 on 5081 degrees of freedom
## (3690 observations deleted due to missingness)
## AIC: 6002.6
##
## Number of Fisher Scoring iterations: 4
exp(coef(model3))
## (Intercept) condom20Yes
## 0.0935437 1.1892156
## agegroup25-44 agegroup45-64
## 1.1989217 0.7484031
## agegroup65+ newraceBlack, non-Hispanic
## 0.3550922 3.3989954
## newraceHispanic newraceAsian/PI, non-Hispanic
## 3.4334836 1.0552000
## newraceOther, non-Hispanic sexpartner
## 2.1557829 1.6402865
broom::tidy(model3, exponentiate=TRUE, conf.int=TRUE)
The outcome variable was HIV testing done (i.e., yes, no), and the predictor variable was condom usage (i.e., yes, no). The hypothesis was that individuals who had HIV testing within the last 12 months would be more likely to use condoms than those with no HIV testing. The logistic regression adjusted for three other variables: race and ethnicity, age group, and number of sex partners. The results of the regression show that HIV testing was more common among those who used condoms. This positive association was also the case for people who were non-Hispanic Black, Hispanic, non-Hispanic Other, and if they had more sexual partners. HIV testing was less common for people who were 45 years or older.
The beta coefficients (estimates) were interpreted such that an estimate greater than 0 meant that the predictor variable had a positive causal relation with the outcome, and a variable with an estimate less than 0 had a negative causal relation–considering both the corresponding magnitude and p-value (<0.05). The exponential of the estimates (e^beta coefficient) provides the odd ratios which allow for easier interpretation. The odds of HIV testing in the group who used condoms are 1.19 times the odds of the group who did not (OR = 1.19), and so forth for the aforementioned racial groups (ORs = 3.4, 3.43, 2.16) and those with more sexual partners (OR = 1.64).
These findings align with the research hypothesis that HIV testing is positively associated with condom use. As the number of sexual partners was also associated with HIV testing, there may be a significant relationship between HIV testing, sexual behavior, and condom use. As the number of sex partners someone had went up, there was an increase in the odds of them having had an HIV test done in the last 12 months. Further analysis can be done to understand whether there is a significant interaction between condom use and the number of partners (condom20 * factor(sexpartner)).
The coefficients for Levi’s sample code were similar to Model 1’s coefficients that only take into account condom usage.
chs.dsgn <-
svydesign(
ids = ~ 1,
strata = ~ strata,
weights = ~ wt21_dual, # match to current year dataset
data = chs2020, # match to current year dataset
nest = TRUE,
na.rm = TRUE
)
fit <- svyglm(hiv12months20 ~ condom20,
design = chs.dsgn,
family="quasibinomial")
summary(fit)
##
## Call:
## svyglm(formula = hiv12months20 ~ condom20, design = chs.dsgn,
## family = "quasibinomial")
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~wt21_dual, data = chs2020,
## nest = TRUE, na.rm = TRUE)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.72391 0.05169 -14.0 < 2e-16 ***
## condom20Yes 0.48980 0.09242 5.3 1.21e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.068956)
##
## Number of Fisher Scoring iterations: 4