library(Hmisc)
library(data.table)
library(knitr)
library(kableExtra)
library(survey)

Load state survey data

# states_a_m = mdb.get("~/Downloads/SADC_2017_State_A_M.MDB", tables=c('SADCQN'))
# states_n_z = mdb.get("~/Downloads/SADC_2017_State_N_Z.MDB", tables=c('SADCQN'))
# states_combined = rbind(states_a_m, states_n_z)
# fwrite(states_combined, '~/Downloads/SADCQN_2017_State.csv')

states_combined_dt = fread('~/Downloads/SADCQN_2017_State.csv')
state_svy_qn <- svydesign(id = ~PSU, weight = ~weight, strata = ~stratum,
                               data = states_combined_dt, nest = TRUE)

Verify correct use of data

Will try to replicate New York reported non-condom use split by whether they have had sex with same sex only, or with other sex only / both sexes.

Screenshot of link below https://nccd.cdc.gov/Youthonline/App/Results.aspx?TT=C&OUT=0&SID=HS&QID=H64&LID=NY&YID=2017&LID2=&YID2=&COL=P&ROW1=N&ROW2=N&HT=QQ&LCT=LL&FS=S1&FR=R1&FG=G1&FA=A1&FI=I1&FP=P1&FSL=S1&FRL=R1&FGL=G1&FAL=A1&FIL=I1&FPL=P1&PV=&TST=False&C1=&C2=&QP=G&DP=1&VA=CI&CS=Y&SYID=&EYID=&SC=DEFAULT&SO=ASC

# non-use among those with opposite-sex partners only
svyciprop(~I(qn64 == 2), 
          subset(state_svy_qn, year == 2017 & sexpart2 == 2 & sitename == "New York (NY)"), 
          na.rm=TRUE)
##                     2.5% 97.5%
## I(qn64 == 2) 0.380 0.329  0.43

Point estimate matches but CI is slightly off. None of the estimation methods built into svyciprop give a matching CI (“logit”, “likelihood”, “asin”, “beta”, “mean”,“xlogit”).

# non-use among those with same-sex partners
svyciprop(~I(qn64 == 2), subset(state_svy_qn, year == 2017 & sexpart2 == 3 & sitename == "New York (NY)"), na.rm=TRUE)
##                     2.5% 97.5%
## I(qn64 == 2) 0.596 0.494  0.69

Point estimate matches but have similar issue with CI.

Test bootstrap intervals

If these work, we’ll have more freedom when creating prediction intervals with ML predictions

library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
## 
##     aml
## The following object is masked from 'package:lattice':
## 
##     melanoma
subset_dt = states_combined_dt[year == 2017 & sexpart2 == 3 & sitename == "New York (NY)", .(qn64, weight)]
bt = boot(
  data = subset_dt,
  R = 500,
  statistic = function(data, ind) weighted.mean(data[ind, qn64==2], data[ind, weight], na.rm=T))
quantile(bt$t, c(.025, 0.5, 0.975))
##      2.5%       50%     97.5% 
## 0.4808562 0.5981939 0.7167019
subset_dt = states_combined_dt[year == 2017 & sexpart2 == 2 & sitename == "New York (NY)", .(qn64, weight)]
bt = boot(
  data = subset_dt,
  R = 500,
  statistic = function(data, ind) weighted.mean(data[ind, qn64==2], data[ind, weight], na.rm=T))
quantile(bt$t, c(.025, 0.5, 0.975))
##      2.5%       50%     97.5% 
## 0.3330469 0.3790481 0.4260592

These are very close to the website and survey package result! So we can probably use this method to generate prediction intervals based on ML model predicitons.

Get sexual minority estimates by district and create tables

Self-reported gay or bisexual proportion by district

sitenames = unique(states_combined_dt$sitename)
sitenames = sitenames[order(sitenames)]

get_pct_in_sitename_w_var_val = function(site, var, val, data, yr) {
  
  # break if no data
  dt = as.data.table(data$variables)[sitename == site & year == yr]
  n_responses = dt[!is.na(get(var)), .N] 
  if(n_responses == 0) {
    return(data.table(
      sitename = site,
      year = yr,
      variable = var,
      value = val,
      responses = n_responses,
      proportion = NA,
      lb = NA,
      ub = NA
    ))
  }
  
  # otherwise, get results
  res = svyciprop(as.formula(sprintf("~I(%s == %d)", var, val)), subset(data, year == yr & sitename == site), na.rm=TRUE)
  prop = unname(res[1])
  ci = unname(attr(res, "ci"))
  return(data.table(
    sitename = site,
    year = yr,
    variable = var,
    value = val,
    responses = n_responses,
    proportion = round(prop,3),
    lb = round(ci[1],3),
    ub = round(ci[2],3)
  ))
}


rbindlist(lapply(sitenames, function(x) get_pct_in_sitename_w_var_val(x, var = 'sexid2', val = 2, data = state_svy_qn, yr = 2017))) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover"))
sitename year variable value responses proportion lb ub
Alabama (AL) 2017 sexid2 2 0 NA NA NA
Alaska (AK) 2017 sexid2 2 0 NA NA NA
Arizona (AZB) 2017 sexid2 2 2080 0.116 0.091 0.147
Arkansas (AR) 2017 sexid2 2 1566 0.142 0.104 0.191
California (CA) 2017 sexid2 2 1759 0.094 0.077 0.115
Colorado (CO) 2017 sexid2 2 1294 0.116 0.088 0.150
Delaware (DE) 2017 sexid2 2 2658 0.108 0.092 0.126
Florida (FL) 2017 sexid2 2 5938 0.103 0.096 0.112
Hawaii (HI) 2017 sexid2 2 5769 0.112 0.098 0.127
Idaho (ID) 2017 sexid2 2 0 NA NA NA
Illinois (IL) 2017 sexid2 2 4721 0.102 0.087 0.119
Iowa (IA) 2017 sexid2 2 1529 0.087 0.070 0.108
Kansas (KS) 2017 sexid2 2 0 NA NA NA
Kentucky (KY) 2017 sexid2 2 1966 0.113 0.089 0.143
Louisiana (LA) 2017 sexid2 2 0 NA NA NA
Maine (ME) 2017 sexid2 2 9404 0.114 0.104 0.125
Michigan (MI) 2017 sexid2 2 1602 0.089 0.070 0.112
Mississippi (MS) 2017 sexid2 2 0 NA NA NA
Missouri (MO) 2017 sexid2 2 0 NA NA NA
Montana (MT) 2017 sexid2 2 0 NA NA NA
Nebraska (NE) 2017 sexid2 2 1385 0.089 0.067 0.116
Nevada (NV) 2017 sexid2 2 1627 0.134 0.110 0.162
New Hampshire (NH) 2017 sexid2 2 11853 0.098 0.091 0.105
New Jersey (NJ) 2017 sexid2 2 0 NA NA NA
New York (NY) 2017 sexid2 2 11048 0.116 0.099 0.136
North Carolina (NC) 2017 sexid2 2 3049 0.107 0.090 0.127
North Dakota (ND) 2017 sexid2 2 2099 0.094 0.079 0.110
Oklahoma (OK) 2017 sexid2 2 1613 0.098 0.075 0.125
Pennsylvania (PA) 2017 sexid2 2 3594 0.093 0.081 0.107
Rhode Island (RI) 2017 sexid2 2 2189 0.111 0.089 0.138
South Carolina (SC) 2017 sexid2 2 1325 0.129 0.109 0.154
South Dakota (SD) 2017 sexid2 2 0 NA NA NA
Tennessee (TN) 2017 sexid2 2 0 NA NA NA
Utah (UT) 2017 sexid2 2 0 NA NA NA
Virginia (VA) 2017 sexid2 2 0 NA NA NA
West Virginia (WV) 2017 sexid2 2 1529 0.094 0.072 0.122
Wisconsin (WI) 2017 sexid2 2 2018 0.098 0.081 0.119
Wyoming (WY) 2017 sexid2 2 0 NA NA NA

Self-reported proportion having sex with someone of the same sex by district

rbindlist(lapply(sitenames, function(x) get_pct_in_sitename_w_var_val(x, var = 'sexpart2', val = 3, data = state_svy_qn, yr = 2017))) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover"))
sitename year variable value responses proportion lb ub
Alabama (AL) 2017 sexpart2 3 0 NA NA NA
Alaska (AK) 2017 sexpart2 3 0 NA NA NA
Arizona (AZB) 2017 sexpart2 3 0 NA NA NA
Arkansas (AR) 2017 sexpart2 3 1388 0.131 0.088 0.191
California (CA) 2017 sexpart2 3 1633 0.074 0.056 0.097
Colorado (CO) 2017 sexpart2 3 0 NA NA NA
Delaware (DE) 2017 sexpart2 3 2665 0.074 0.062 0.089
Florida (FL) 2017 sexpart2 3 5671 0.082 0.074 0.091
Hawaii (HI) 2017 sexpart2 3 5335 0.075 0.065 0.088
Idaho (ID) 2017 sexpart2 3 0 NA NA NA
Illinois (IL) 2017 sexpart2 3 4341 0.083 0.071 0.098
Iowa (IA) 2017 sexpart2 3 1448 0.064 0.052 0.078
Kansas (KS) 2017 sexpart2 3 0 NA NA NA
Kentucky (KY) 2017 sexpart2 3 1814 0.076 0.060 0.095
Louisiana (LA) 2017 sexpart2 3 0 NA NA NA
Maine (ME) 2017 sexpart2 3 8599 0.087 0.079 0.097
Michigan (MI) 2017 sexpart2 3 1517 0.079 0.060 0.104
Mississippi (MS) 2017 sexpart2 3 0 NA NA NA
Missouri (MO) 2017 sexpart2 3 0 NA NA NA
Montana (MT) 2017 sexpart2 3 0 NA NA NA
Nebraska (NE) 2017 sexpart2 3 1324 0.056 0.043 0.074
Nevada (NV) 2017 sexpart2 3 1546 0.093 0.077 0.112
New Hampshire (NH) 2017 sexpart2 3 11428 0.056 0.051 0.061
New Jersey (NJ) 2017 sexpart2 3 0 NA NA NA
New York (NY) 2017 sexpart2 3 9373 0.079 0.069 0.091
North Carolina (NC) 2017 sexpart2 3 2900 0.089 0.075 0.105
North Dakota (ND) 2017 sexpart2 3 0 NA NA NA
Oklahoma (OK) 2017 sexpart2 3 1541 0.072 0.054 0.096
Pennsylvania (PA) 2017 sexpart2 3 3415 0.066 0.055 0.079
Rhode Island (RI) 2017 sexpart2 3 2003 0.079 0.060 0.104
South Carolina (SC) 2017 sexpart2 3 1212 0.104 0.081 0.132
South Dakota (SD) 2017 sexpart2 3 0 NA NA NA
Tennessee (TN) 2017 sexpart2 3 0 NA NA NA
Utah (UT) 2017 sexpart2 3 0 NA NA NA
Virginia (VA) 2017 sexpart2 3 0 NA NA NA
West Virginia (WV) 2017 sexpart2 3 1433 0.076 0.059 0.097
Wisconsin (WI) 2017 sexpart2 3 1941 0.061 0.051 0.073
Wyoming (WY) 2017 sexpart2 3 0 NA NA NA

Self-reported transgender identity by district

rbindlist(lapply(sitenames, function(x) get_pct_in_sitename_w_var_val(x, var = 'qntransgender', val = 1, data = state_svy_qn, yr = 2017))) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover"))
sitename year variable value responses proportion lb ub
Alabama (AL) 2017 qntransgender 1 0 NA NA NA
Alaska (AK) 2017 qntransgender 1 0 NA NA NA
Arizona (AZB) 2017 qntransgender 1 0 NA NA NA
Arkansas (AR) 2017 qntransgender 1 0 NA NA NA
California (CA) 2017 qntransgender 1 0 NA NA NA
Colorado (CO) 2017 qntransgender 1 1303 0.012 0.006 0.021
Delaware (DE) 2017 qntransgender 1 2876 0.014 0.009 0.022
Florida (FL) 2017 qntransgender 1 0 NA NA NA
Hawaii (HI) 2017 qntransgender 1 5740 0.031 0.024 0.040
Idaho (ID) 2017 qntransgender 1 0 NA NA NA
Illinois (IL) 2017 qntransgender 1 0 NA NA NA
Iowa (IA) 2017 qntransgender 1 0 NA NA NA
Kansas (KS) 2017 qntransgender 1 0 NA NA NA
Kentucky (KY) 2017 qntransgender 1 0 NA NA NA
Louisiana (LA) 2017 qntransgender 1 0 NA NA NA
Maine (ME) 2017 qntransgender 1 9330 0.017 0.014 0.021
Michigan (MI) 2017 qntransgender 1 1614 0.013 0.008 0.022
Mississippi (MS) 2017 qntransgender 1 0 NA NA NA
Missouri (MO) 2017 qntransgender 1 0 NA NA NA
Montana (MT) 2017 qntransgender 1 0 NA NA NA
Nebraska (NE) 2017 qntransgender 1 0 NA NA NA
Nevada (NV) 2017 qntransgender 1 0 NA NA NA
New Hampshire (NH) 2017 qntransgender 1 0 NA NA NA
New Jersey (NJ) 2017 qntransgender 1 0 NA NA NA
New York (NY) 2017 qntransgender 1 0 NA NA NA
North Carolina (NC) 2017 qntransgender 1 0 NA NA NA
North Dakota (ND) 2017 qntransgender 1 0 NA NA NA
Oklahoma (OK) 2017 qntransgender 1 0 NA NA NA
Pennsylvania (PA) 2017 qntransgender 1 0 NA NA NA
Rhode Island (RI) 2017 qntransgender 1 2204 0.023 0.014 0.038
South Carolina (SC) 2017 qntransgender 1 0 NA NA NA
South Dakota (SD) 2017 qntransgender 1 0 NA NA NA
Tennessee (TN) 2017 qntransgender 1 0 NA NA NA
Utah (UT) 2017 qntransgender 1 0 NA NA NA
Virginia (VA) 2017 qntransgender 1 0 NA NA NA
West Virginia (WV) 2017 qntransgender 1 0 NA NA NA
Wisconsin (WI) 2017 qntransgender 1 1871 0.022 0.015 0.033
Wyoming (WY) 2017 qntransgender 1 0 NA NA NA

States with self-reported gay or bisexual data before 2017

sort(states_combined_dt[year<2017 & !is.na(sexid2), unique(sitename)])
##  [1] "Arizona (AZB)"       "Arkansas (AR)"       "California (CA)"    
##  [4] "Delaware (DE)"       "Florida (FL)"        "Hawaii (HI)"        
##  [7] "Illinois (IL)"       "Kentucky (KY)"       "Maine (ME)"         
## [10] "Michigan (MI)"       "Nevada (NV)"         "New Hampshire (NH)" 
## [13] "New York (NY)"       "North Carolina (NC)" "North Dakota (ND)"  
## [16] "Oklahoma (OK)"       "Pennsylvania (PA)"   "Rhode Island (RI)"  
## [19] "West Virginia (WV)"  "Wisconsin (WI)"      "Wyoming (WY)"

States with same-sex sexual contacts data before 2017

sort(states_combined_dt[year<2017 & !is.na(sexpart2), unique(sitename)])
##  [1] "Arkansas (AR)"       "California (CA)"     "Delaware (DE)"      
##  [4] "Florida (FL)"        "Hawaii (HI)"         "Illinois (IL)"      
##  [7] "Kentucky (KY)"       "Maine (ME)"          "Michigan (MI)"      
## [10] "Nevada (NV)"         "New Hampshire (NH)"  "New Jersey (NJ)"    
## [13] "New York (NY)"       "North Carolina (NC)" "North Dakota (ND)"  
## [16] "Oklahoma (OK)"       "Pennsylvania (PA)"   "Rhode Island (RI)"  
## [19] "West Virginia (WV)"  "Wisconsin (WI)"      "Wyoming (WY)"

Transgender identity data before 2017?

states_combined_dt[!is.na(qntransgender), .N, by='year']
##    year     N
## 1: 2017 24938

Nope, only have transgender indentity data starting in 2017.