library(Hmisc)
library(data.table)
library(knitr)
library(kableExtra)
library(survey)
Load state survey data
# states_a_m = mdb.get("~/Downloads/SADC_2017_State_A_M.MDB", tables=c('SADCQN'))
# states_n_z = mdb.get("~/Downloads/SADC_2017_State_N_Z.MDB", tables=c('SADCQN'))
# states_combined = rbind(states_a_m, states_n_z)
# fwrite(states_combined, '~/Downloads/SADCQN_2017_State.csv')
states_combined_dt = fread('~/Downloads/SADCQN_2017_State.csv')
state_svy_qn <- svydesign(id = ~PSU, weight = ~weight, strata = ~stratum,
data = states_combined_dt, nest = TRUE)
Will try to replicate New York reported non-condom use split by whether they have had sex with same sex only, or with other sex only / both sexes.
# non-use among those with opposite-sex partners only
svyciprop(~I(qn64 == 2),
subset(state_svy_qn, year == 2017 & sexpart2 == 2 & sitename == "New York (NY)"),
na.rm=TRUE)
## 2.5% 97.5%
## I(qn64 == 2) 0.380 0.329 0.43
Point estimate matches but CI is slightly off. None of the estimation methods built into svyciprop give a matching CI (“logit”, “likelihood”, “asin”, “beta”, “mean”,“xlogit”).
# non-use among those with same-sex partners
svyciprop(~I(qn64 == 2), subset(state_svy_qn, year == 2017 & sexpart2 == 3 & sitename == "New York (NY)"), na.rm=TRUE)
## 2.5% 97.5%
## I(qn64 == 2) 0.596 0.494 0.69
Point estimate matches but have similar issue with CI.
If these work, we’ll have more freedom when creating prediction intervals with ML predictions
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## The following object is masked from 'package:lattice':
##
## melanoma
subset_dt = states_combined_dt[year == 2017 & sexpart2 == 3 & sitename == "New York (NY)", .(qn64, weight)]
bt = boot(
data = subset_dt,
R = 500,
statistic = function(data, ind) weighted.mean(data[ind, qn64==2], data[ind, weight], na.rm=T))
quantile(bt$t, c(.025, 0.5, 0.975))
## 2.5% 50% 97.5%
## 0.4808562 0.5981939 0.7167019
subset_dt = states_combined_dt[year == 2017 & sexpart2 == 2 & sitename == "New York (NY)", .(qn64, weight)]
bt = boot(
data = subset_dt,
R = 500,
statistic = function(data, ind) weighted.mean(data[ind, qn64==2], data[ind, weight], na.rm=T))
quantile(bt$t, c(.025, 0.5, 0.975))
## 2.5% 50% 97.5%
## 0.3330469 0.3790481 0.4260592
These are very close to the website and survey package result! So we can probably use this method to generate prediction intervals based on ML model predicitons.
sitenames = unique(states_combined_dt$sitename)
sitenames = sitenames[order(sitenames)]
get_pct_in_sitename_w_var_val = function(site, var, val, data, yr) {
# break if no data
dt = as.data.table(data$variables)[sitename == site & year == yr]
n_responses = dt[!is.na(get(var)), .N]
if(n_responses == 0) {
return(data.table(
sitename = site,
year = yr,
variable = var,
value = val,
responses = n_responses,
proportion = NA,
lb = NA,
ub = NA
))
}
# otherwise, get results
res = svyciprop(as.formula(sprintf("~I(%s == %d)", var, val)), subset(data, year == yr & sitename == site), na.rm=TRUE)
prop = unname(res[1])
ci = unname(attr(res, "ci"))
return(data.table(
sitename = site,
year = yr,
variable = var,
value = val,
responses = n_responses,
proportion = round(prop,3),
lb = round(ci[1],3),
ub = round(ci[2],3)
))
}
rbindlist(lapply(sitenames, function(x) get_pct_in_sitename_w_var_val(x, var = 'sexid2', val = 2, data = state_svy_qn, yr = 2017))) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover"))
| sitename | year | variable | value | responses | proportion | lb | ub |
|---|---|---|---|---|---|---|---|
| Alabama (AL) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Alaska (AK) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Arizona (AZB) | 2017 | sexid2 | 2 | 2080 | 0.116 | 0.091 | 0.147 |
| Arkansas (AR) | 2017 | sexid2 | 2 | 1566 | 0.142 | 0.104 | 0.191 |
| California (CA) | 2017 | sexid2 | 2 | 1759 | 0.094 | 0.077 | 0.115 |
| Colorado (CO) | 2017 | sexid2 | 2 | 1294 | 0.116 | 0.088 | 0.150 |
| Delaware (DE) | 2017 | sexid2 | 2 | 2658 | 0.108 | 0.092 | 0.126 |
| Florida (FL) | 2017 | sexid2 | 2 | 5938 | 0.103 | 0.096 | 0.112 |
| Hawaii (HI) | 2017 | sexid2 | 2 | 5769 | 0.112 | 0.098 | 0.127 |
| Idaho (ID) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Illinois (IL) | 2017 | sexid2 | 2 | 4721 | 0.102 | 0.087 | 0.119 |
| Iowa (IA) | 2017 | sexid2 | 2 | 1529 | 0.087 | 0.070 | 0.108 |
| Kansas (KS) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Kentucky (KY) | 2017 | sexid2 | 2 | 1966 | 0.113 | 0.089 | 0.143 |
| Louisiana (LA) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Maine (ME) | 2017 | sexid2 | 2 | 9404 | 0.114 | 0.104 | 0.125 |
| Michigan (MI) | 2017 | sexid2 | 2 | 1602 | 0.089 | 0.070 | 0.112 |
| Mississippi (MS) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Missouri (MO) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Montana (MT) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Nebraska (NE) | 2017 | sexid2 | 2 | 1385 | 0.089 | 0.067 | 0.116 |
| Nevada (NV) | 2017 | sexid2 | 2 | 1627 | 0.134 | 0.110 | 0.162 |
| New Hampshire (NH) | 2017 | sexid2 | 2 | 11853 | 0.098 | 0.091 | 0.105 |
| New Jersey (NJ) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| New York (NY) | 2017 | sexid2 | 2 | 11048 | 0.116 | 0.099 | 0.136 |
| North Carolina (NC) | 2017 | sexid2 | 2 | 3049 | 0.107 | 0.090 | 0.127 |
| North Dakota (ND) | 2017 | sexid2 | 2 | 2099 | 0.094 | 0.079 | 0.110 |
| Oklahoma (OK) | 2017 | sexid2 | 2 | 1613 | 0.098 | 0.075 | 0.125 |
| Pennsylvania (PA) | 2017 | sexid2 | 2 | 3594 | 0.093 | 0.081 | 0.107 |
| Rhode Island (RI) | 2017 | sexid2 | 2 | 2189 | 0.111 | 0.089 | 0.138 |
| South Carolina (SC) | 2017 | sexid2 | 2 | 1325 | 0.129 | 0.109 | 0.154 |
| South Dakota (SD) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Tennessee (TN) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Utah (UT) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| Virginia (VA) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
| West Virginia (WV) | 2017 | sexid2 | 2 | 1529 | 0.094 | 0.072 | 0.122 |
| Wisconsin (WI) | 2017 | sexid2 | 2 | 2018 | 0.098 | 0.081 | 0.119 |
| Wyoming (WY) | 2017 | sexid2 | 2 | 0 | NA | NA | NA |
rbindlist(lapply(sitenames, function(x) get_pct_in_sitename_w_var_val(x, var = 'sexpart2', val = 3, data = state_svy_qn, yr = 2017))) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover"))
| sitename | year | variable | value | responses | proportion | lb | ub |
|---|---|---|---|---|---|---|---|
| Alabama (AL) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Alaska (AK) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Arizona (AZB) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Arkansas (AR) | 2017 | sexpart2 | 3 | 1388 | 0.131 | 0.088 | 0.191 |
| California (CA) | 2017 | sexpart2 | 3 | 1633 | 0.074 | 0.056 | 0.097 |
| Colorado (CO) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Delaware (DE) | 2017 | sexpart2 | 3 | 2665 | 0.074 | 0.062 | 0.089 |
| Florida (FL) | 2017 | sexpart2 | 3 | 5671 | 0.082 | 0.074 | 0.091 |
| Hawaii (HI) | 2017 | sexpart2 | 3 | 5335 | 0.075 | 0.065 | 0.088 |
| Idaho (ID) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Illinois (IL) | 2017 | sexpart2 | 3 | 4341 | 0.083 | 0.071 | 0.098 |
| Iowa (IA) | 2017 | sexpart2 | 3 | 1448 | 0.064 | 0.052 | 0.078 |
| Kansas (KS) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Kentucky (KY) | 2017 | sexpart2 | 3 | 1814 | 0.076 | 0.060 | 0.095 |
| Louisiana (LA) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Maine (ME) | 2017 | sexpart2 | 3 | 8599 | 0.087 | 0.079 | 0.097 |
| Michigan (MI) | 2017 | sexpart2 | 3 | 1517 | 0.079 | 0.060 | 0.104 |
| Mississippi (MS) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Missouri (MO) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Montana (MT) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Nebraska (NE) | 2017 | sexpart2 | 3 | 1324 | 0.056 | 0.043 | 0.074 |
| Nevada (NV) | 2017 | sexpart2 | 3 | 1546 | 0.093 | 0.077 | 0.112 |
| New Hampshire (NH) | 2017 | sexpart2 | 3 | 11428 | 0.056 | 0.051 | 0.061 |
| New Jersey (NJ) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| New York (NY) | 2017 | sexpart2 | 3 | 9373 | 0.079 | 0.069 | 0.091 |
| North Carolina (NC) | 2017 | sexpart2 | 3 | 2900 | 0.089 | 0.075 | 0.105 |
| North Dakota (ND) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Oklahoma (OK) | 2017 | sexpart2 | 3 | 1541 | 0.072 | 0.054 | 0.096 |
| Pennsylvania (PA) | 2017 | sexpart2 | 3 | 3415 | 0.066 | 0.055 | 0.079 |
| Rhode Island (RI) | 2017 | sexpart2 | 3 | 2003 | 0.079 | 0.060 | 0.104 |
| South Carolina (SC) | 2017 | sexpart2 | 3 | 1212 | 0.104 | 0.081 | 0.132 |
| South Dakota (SD) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Tennessee (TN) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Utah (UT) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| Virginia (VA) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
| West Virginia (WV) | 2017 | sexpart2 | 3 | 1433 | 0.076 | 0.059 | 0.097 |
| Wisconsin (WI) | 2017 | sexpart2 | 3 | 1941 | 0.061 | 0.051 | 0.073 |
| Wyoming (WY) | 2017 | sexpart2 | 3 | 0 | NA | NA | NA |
rbindlist(lapply(sitenames, function(x) get_pct_in_sitename_w_var_val(x, var = 'qntransgender', val = 1, data = state_svy_qn, yr = 2017))) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover"))
| sitename | year | variable | value | responses | proportion | lb | ub |
|---|---|---|---|---|---|---|---|
| Alabama (AL) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Alaska (AK) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Arizona (AZB) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Arkansas (AR) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| California (CA) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Colorado (CO) | 2017 | qntransgender | 1 | 1303 | 0.012 | 0.006 | 0.021 |
| Delaware (DE) | 2017 | qntransgender | 1 | 2876 | 0.014 | 0.009 | 0.022 |
| Florida (FL) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Hawaii (HI) | 2017 | qntransgender | 1 | 5740 | 0.031 | 0.024 | 0.040 |
| Idaho (ID) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Illinois (IL) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Iowa (IA) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Kansas (KS) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Kentucky (KY) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Louisiana (LA) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Maine (ME) | 2017 | qntransgender | 1 | 9330 | 0.017 | 0.014 | 0.021 |
| Michigan (MI) | 2017 | qntransgender | 1 | 1614 | 0.013 | 0.008 | 0.022 |
| Mississippi (MS) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Missouri (MO) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Montana (MT) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Nebraska (NE) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Nevada (NV) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| New Hampshire (NH) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| New Jersey (NJ) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| New York (NY) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| North Carolina (NC) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| North Dakota (ND) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Oklahoma (OK) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Pennsylvania (PA) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Rhode Island (RI) | 2017 | qntransgender | 1 | 2204 | 0.023 | 0.014 | 0.038 |
| South Carolina (SC) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| South Dakota (SD) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Tennessee (TN) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Utah (UT) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Virginia (VA) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| West Virginia (WV) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
| Wisconsin (WI) | 2017 | qntransgender | 1 | 1871 | 0.022 | 0.015 | 0.033 |
| Wyoming (WY) | 2017 | qntransgender | 1 | 0 | NA | NA | NA |
sort(states_combined_dt[year<2017 & !is.na(sexid2), unique(sitename)])
## [1] "Arizona (AZB)" "Arkansas (AR)" "California (CA)"
## [4] "Delaware (DE)" "Florida (FL)" "Hawaii (HI)"
## [7] "Illinois (IL)" "Kentucky (KY)" "Maine (ME)"
## [10] "Michigan (MI)" "Nevada (NV)" "New Hampshire (NH)"
## [13] "New York (NY)" "North Carolina (NC)" "North Dakota (ND)"
## [16] "Oklahoma (OK)" "Pennsylvania (PA)" "Rhode Island (RI)"
## [19] "West Virginia (WV)" "Wisconsin (WI)" "Wyoming (WY)"
sort(states_combined_dt[year<2017 & !is.na(sexpart2), unique(sitename)])
## [1] "Arkansas (AR)" "California (CA)" "Delaware (DE)"
## [4] "Florida (FL)" "Hawaii (HI)" "Illinois (IL)"
## [7] "Kentucky (KY)" "Maine (ME)" "Michigan (MI)"
## [10] "Nevada (NV)" "New Hampshire (NH)" "New Jersey (NJ)"
## [13] "New York (NY)" "North Carolina (NC)" "North Dakota (ND)"
## [16] "Oklahoma (OK)" "Pennsylvania (PA)" "Rhode Island (RI)"
## [19] "West Virginia (WV)" "Wisconsin (WI)" "Wyoming (WY)"
states_combined_dt[!is.na(qntransgender), .N, by='year']
## year N
## 1: 2017 24938
Nope, only have transgender indentity data starting in 2017.