RANDS During COVID-19 Health Insurance Analysis

RANDS Survey Question

HICOV: Are you covered by any kind of health insurance or some other kind of health care plan?

This analysis provides estimates of the proportion of individuals who reported being covered by any kind of health insurance or other kind of health care plan, along with the associated standard error, and 95% confidence intervals. Missing and refused responses are excluded, however participants who answered “don’t know” are not excluded from the denominator.

Define a function to calculate weighted proportions, SE’s, unweighted counts, and effective sample size

The beta method for svyciprop uses the Clopper-Pearson method with an effective sample size based on the estimated variance of the proportion (Korn and Graubard, 1998) to compute confidence intervals. svyciprop documentation

getSummary <- function(varformula, byformula, design){
  c <- svyby(varformula, byformula, design, unwtd.count ) #unweighted counts
  p <- svyby(varformula, byformula, design, svyciprop, method="beta", na.rm=T)  #weighted proportion and SE
  i <- confint(p)                                       #confidence intervals for proportion
  v <- svyby(varformula, byformula, design, svyvar, na.rm=T)     #calculates estimate of population variance
  
  outSum <- left_join(dplyr::select(c,-se), p) 
  outSum <- cbind(outSum, i)
    
  outSum <- dplyr::rename(outSum, SE = 'se.as.numeric(I(HICOV == 1))', 
                                  Percent = 'I(HICOV == 1)',
                                  LCL = "2.5 %",
                                  UCL = "97.5 %")
  
  outSum <- left_join(outSum, dplyr::select(v, -se) %>% rename(PopVar='I(HICOV == 1)'))
  outSum$N_effective<-outSum$PopVar/outSum$SE^2
  outSum$PopVar <-NULL
  
  outSum$DF <- degf(design)
  
  outSum
}

Set global options for how to deal with stratum with a single PSU

The adjust option centers single psu’s at the population mean for variance computation rather than dropping PSU for variance calculation. This option produces standard errors equivalent to the MISSUNIT option in SUDAAN.

options(survey.lonely.psu = 'adjust')

Import RANDS during COVID datasets

chnl<-odbcDriverConnect("DSN=GRASP_MSSQL;UID=ppk8;DATABASE=OMH")
rands1 <- sqlFetch(chnl, "dbo.RANDS_COVID_1")
rands2 <- sqlFetch(chnl, "dbo.RANDS_COVID_2")
rands3 <- sqlFetch(chnl, "dbo.RANDS_COVID_3")

rands1$round<-1
rands2$round<-2
rands3$round<-3

Combine rounds to clean and format data then split into list of data frames

KeepColumns<-c('round', 'CaseID', 'RACETHNICITY', 'AGE', 'HICOV', 'S_VPSU', 'S_VSTRAT', 'WEIGHT_CALIBRATED')
AllRands<-rbind.fill(rands1[, KeepColumns], rands2[, KeepColumns], rands3[, KeepColumns])

#Set WS, Refused to missing
AllRands[AllRands == '98']<-NA
AllRands[AllRands == '99']<-NA

#Create age group variable
AllRands$AgeGroup<-cut(AllRands$AGE, breaks=c(0, 65, 999), right=F, labels=c('Under 65', '65+'), ordered=T)

#Label race/ethnicity variable
AllRands$RACETHNICITY<-factor(AllRands$RACETHNICITY, levels=c(1:4), labels=c('White, non-Hispanic', 'Black, non-Hispanic', 'Other, non-Hispanic', 'Hispanic'))

AllRands<-split(AllRands, AllRands$round)

Estimate percents by RACETHNICITY for each round

Produces estimates for all participants and for participants by age group

Result1 <- data.frame(matrix(nrow = 0, ncol = 9))

for (i in 1:length(AllRands)){
  #specify survey design
  AllDesign <-  svydesign(id = ~S_VPSU,
                        strata  = ~S_VSTRAT,
                        weights = ~WEIGHT_CALIBRATED,
                        nest    = TRUE,
                        data    = AllRands[[i]])
  
    #Save results to table
    temp<-getSummary(~I(HICOV == 1), ~RACETHNICITY, AllDesign)
    temp$round<-i
    Result1<-rbind(Result1, temp)
}

## Joining, by = "RACETHNICITY"
## Joining, by = "RACETHNICITY"
## Joining, by = "RACETHNICITY"
## Joining, by = "RACETHNICITY"
## Joining, by = "RACETHNICITY"
## Joining, by = "RACETHNICITY"

Result2 <- data.frame(matrix(nrow = 0, ncol = 8))

for (i in 1:length(AllRands)){
  #specify survey design
  AllDesign <-  svydesign(id = ~S_VPSU,
                          strata  = ~S_VSTRAT,
                          weights = ~WEIGHT_CALIBRATED,
                          nest    = TRUE,
                          data    = AllRands[[i]])
  
  #Save results to table
  temp<-getSummary(~I(HICOV == 1), ~RACETHNICITY+AgeGroup, AllDesign)
  temp$round<-i
  Result2<-rbind(Result2, temp)
}

## Joining, by = c("RACETHNICITY", "AgeGroup")
## Joining, by = c("RACETHNICITY", "AgeGroup")
## Joining, by = c("RACETHNICITY", "AgeGroup")
## Joining, by = c("RACETHNICITY", "AgeGroup")

## Warning in qbeta(alpha/2, n.eff * rval, n.eff * (1 - rval) + 1): qbeta(a, *) =:
## x0 with |pbeta(x0,*) - alpha| = 0.025 is not accurate

## Joining, by = c("RACETHNICITY", "AgeGroup")
## Joining, by = c("RACETHNICITY", "AgeGroup")

Combined results table and flag estimates that required suppression or review

Result1$AgeGroup<-'All'
Result<-rbind(Result1, Result2)
rm(Result1, Result2)

Result$CIWidth<-Result$UCL - Result$LCL

Result$SampleSizeFlag<-ifelse(Result$N_effective<30, T, F)

Result$SmallCIWidthFlag<-ifelse(Result$CIWidth > 0 & Result$CIWidth<= 0.05 & 
                                Result$Percent != 0 & Result$Percent != 1 &
                                Result$DF >=8, F, 
                                ifelse(Result$CIWidth > 0.05 & Result$CIWidth<0.30, NA,  T))

Result$LargeCIWidthFlag<-ifelse(Result$CIWidth>= 0.30, T, F)

Result$RelativeCIWidthFlag<-ifelse(Result$CIWidth>0.05 & Result$CIWidth < 0.30 &
                                   (Result$CIWidth/Result$Percent < 1.3 & Result$DF >= 8), F,
                                    ifelse(Result$CIWidth <=0.05 | Result$CIWidth >= 0.30, NA, T))

#setwd('C:/Users/ppk8/OneDrive - CDC/OMH Project/GitLab Repo/RANDS')
#write.csv(Result, 'HICOV Estimates by RACETHNICITY and AgeGroup.csv', row.names = F)

Some quick visualizations of health insurance estimates by round, race-ethnicity, and age group

## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Removed 3 rows containing missing values (geom_segment).

## Warning: Removed 2 rows containing missing values (geom_segment).

Linear regression, testing for mean difference in HICOV between Round 1 and Round 3.

Note: Comparisons between Round 1 and Round 2 data are omitted because longitudinal weights are across available in the public-use dataset. The CaseID has been scrambled to protect confidentiality and therefore, data cannot be linked across rounds.

From the NCHS Telemedicine Webpage: Statistical testing between Round 1 and 3 estimates and Round 2 and 3 estimates was performed using a survey regression approach. Data from the comparison rounds were parameterized using a reference cell model to estimate the mean difference and the standard error of the difference of the two estimates while accounting for the covariance of the estimates in clustered samples.

Specify survey desgin

AllRands<-do.call("rbind", AllRands) #combines lists of dataframes back into single dataframe

#Round is factored to prevent the model from treating it as a continuous variable
AllRands$round<-factor(AllRands$round)

R1And3Design <-  svydesign(id = ~S_VPSU,
                        strata  = ~S_VSTRAT,
                        weights = ~WEIGHT_CALIBRATED,
                        nest    = TRUE,
                        data    = AllRands[AllRands$round %in% c(1, 3), ])

Survey-weighted linear regression of HICOV=1 by round

m1<-svyglm(I(HICOV == 1)~round, design=R1And3Design)
summary(m1)

## 
## Call:
## svyglm(formula = I(HICOV == 1) ~ round, design = R1And3Design)
## 
## Survey design:
## svydesign(id = ~S_VPSU, strata = ~S_VSTRAT, weights = ~WEIGHT_CALIBRATED, 
##     nest = TRUE, data = AllRands[AllRands$round %in% c(1, 3), 
##         ])
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.9026483  0.0066954  134.82   <2e-16 ***
## round3      0.0034625  0.0096183    0.36   0.7197    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.086655712)
## 
## Number of Fisher Scoring iterations: 2

confint(m1)

##                    2.5 %      97.5 %
## (Intercept)  0.889350626 0.915945948
## round3      -0.015640261 0.022565263

plot(m1)

Survey-weighted linear regression of HICOV=1 adjusted for age group

#Regression model for mean difference
m2<-svyglm(I(HICOV == 1)~round + AgeGroup, design=R1And3Design)
summary(m2)

## 
## Call:
## svyglm(formula = I(HICOV == 1) ~ round + AgeGroup, design = R1And3Design)
## 
## Survey design:
## svydesign(id = ~S_VPSU, strata = ~S_VSTRAT, weights = ~WEIGHT_CALIBRATED, 
##     nest = TRUE, data = AllRands[AllRands$round %in% c(1, 3), 
##         ])
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 0.9314099  0.0054639 170.4656   <2e-16 ***
## round3      0.0030668  0.0093524   0.3279   0.7437    
## AgeGroup.L  0.0692044  0.0044238  15.6437   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.085077755)
## 
## Number of Fisher Scoring iterations: 2

confint(m2)

##                    2.5 %      97.5 %
## (Intercept)  0.920556547 0.942263345
## round3      -0.015510642 0.021644183
## AgeGroup.L   0.060417122 0.077991694

Survey-weighted linear regression of HICOV=1 allowing different slopes for round by RACETHNICITY

m3<-svyglm(I(HICOV == 1)~round*I(RACETHNICITY) + AgeGroup, design=R1And3Design)

#Effect of round on NH-White respondents
summary(glht(m3, linfct = matrix(c(0, 1, 0, 0, 0, 0, 0, 0, 0), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design)
## 
## Linear Hypotheses:
##         Estimate Std. Error z value Pr(>|z|)
## 1 == 0 0.0056898  0.0081936  0.6944   0.4874
## (Adjusted p values reported -- single-step method)

#Effect of round on NH-Black respondents
summary(glht(m3, linfct = matrix(c(0, 1, 0, 0, 0, 0, 1, 0, 0), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design)
## 
## Linear Hypotheses:
##        Estimate Std. Error z value Pr(>|z|)
## 1 == 0 0.003130   0.027618  0.1133   0.9098
## (Adjusted p values reported -- single-step method)

#Effect of round on NH-Other respondents
summary(glht(m3, linfct = matrix(c(0, 1, 0, 0, 0, 0, 0, 1, 0), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design)
## 
## Linear Hypotheses:
##         Estimate Std. Error z value Pr(>|z|)
## 1 == 0 -0.018992   0.044317 -0.4285   0.6683
## (Adjusted p values reported -- single-step method)

#Effect of round on Hispanic respondents
summary(glht(m3, linfct = matrix(c(0, 1, 0, 0, 0, 0, 0, 0, 1), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design)
## 
## Linear Hypotheses:
##         Estimate Std. Error z value Pr(>|z|)
## 1 == 0 0.0038731  0.0319417  0.1213   0.9035
## (Adjusted p values reported -- single-step method)

#Wald test for round-racethnicity interaction term in model
regTermTest(m3, "round:I(RACETHNICITY)")

## Wald test for round:I(RACETHNICITY)
##  in svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design)
## F =  0.10143614  on  3  and  85  df: p= 0.958983

Survey-weighted GLM with quasibinomial dist, testing for difference in HICOV log(odds) over rounds

The quasibinomial distribution is used to avoid error with non-integer terms due to weighting, however it gives equivalent results to a binomial distribution model.

Survey-weighted logistic regression

m4<-svyglm(I(HICOV == 1)~round, design=R1And3Design, family=quasibinomial())
summary(m4)

## 
## Call:
## svyglm(formula = I(HICOV == 1) ~ round, design = R1And3Design, 
##     family = quasibinomial())
## 
## Survey design:
## svydesign(id = ~S_VPSU, strata = ~S_VSTRAT, weights = ~WEIGHT_CALIBRATED, 
##     nest = TRUE, data = AllRands[AllRands$round %in% c(1, 3), 
##         ])
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.227003   0.076193 29.2284   <2e-16 ***
## round3      0.040043   0.111363  0.3596     0.72    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.0003193)
## 
## Number of Fisher Scoring iterations: 4

exp(coef(m4)) #odds ratio

## (Intercept)      round3 
##   9.2720329   1.0408560

exp(confint(m4))

##                  2.5 %     97.5 %
## (Intercept) 7.96993876 10.7868575
## round3      0.83432398  1.2985138

Survey-weighted logistic regression, adjusted for age group

m5<-svyglm(I(HICOV == 1)~round + AgeGroup, design=R1And3Design, family=quasibinomial())
summary(m5)

## 
## Call:
## svyglm(formula = I(HICOV == 1) ~ round + AgeGroup, design = R1And3Design, 
##     family = quasibinomial())
## 
## Survey design:
## svydesign(id = ~S_VPSU, strata = ~S_VSTRAT, weights = ~WEIGHT_CALIBRATED, 
##     nest = TRUE, data = AllRands[AllRands$round %in% c(1, 3), 
##         ])
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.990503   0.092596 32.2962   <2e-16 ***
## round3      0.036132   0.110310  0.3276    0.744    
## AgeGroup.L  1.381893   0.101300 13.6415   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 0.99923445)
## 
## Number of Fisher Scoring iterations: 6

exp(coef(m5)) #odds ratio

## (Intercept)      round3  AgeGroup.L 
##  19.8956913   1.0367927   3.9824320

exp(confint(m5))

##                   2.5 %     97.5 %
## (Intercept) 16.55308161 23.9132834
## round3       0.83278107  1.2907822
## AgeGroup.L   3.25656137  4.8700953

Survey-weighted logistic regression allowing different slopes for round by RACETHNICITY

m6<-svyglm(I(HICOV == 1)~round*I(RACETHNICITY) + AgeGroup, design=R1And3Design, family=quasibinomial())

#Effect of round on NH-White respondents
summary(glht(m6, linfct = matrix(c(0, 1, 0, 0, 0, 0, 0, 0, 0), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design, family = quasibinomial())
## 
## Linear Hypotheses:
##        Estimate Std. Error z value Pr(>|z|)
## 1 == 0 0.091471   0.130299   0.702   0.4827
## (Adjusted p values reported -- single-step method)

#Effect of round on NH-Black respondents
summary(glht(m6, linfct = matrix(c(0, 1, 0, 0, 0, 0, 1, 0, 0), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design, family = quasibinomial())
## 
## Linear Hypotheses:
##        Estimate Std. Error z value Pr(>|z|)
## 1 == 0 0.026206   0.227034  0.1154   0.9081
## (Adjusted p values reported -- single-step method)

#Effect of round on NH-Other respondents
summary(glht(m6, linfct = matrix(c(0, 1, 0, 0, 0, 0, 0, 1, 0), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design, family = quasibinomial())
## 
## Linear Hypotheses:
##        Estimate Std. Error z value Pr(>|z|)
## 1 == 0 -0.20169    0.45757 -0.4408   0.6594
## (Adjusted p values reported -- single-step method)

#Effect of round on Hispanic respondents
summary(glht(m6, linfct = matrix(c(0, 1, 0, 0, 0, 0, 0, 0, 1), 1)))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design, family = quasibinomial())
## 
## Linear Hypotheses:
##        Estimate Std. Error z value Pr(>|z|)
## 1 == 0 0.028243   0.245971  0.1148   0.9086
## (Adjusted p values reported -- single-step method)

#Wald test for round-racethnicity interaction term in model
regTermTest(m6, "round:I(RACETHNICITY)")

## Wald test for round:I(RACETHNICITY)
##  in svyglm(formula = I(HICOV == 1) ~ round * I(RACETHNICITY) + AgeGroup, 
##     design = R1And3Design, family = quasibinomial())
## F =  0.13825204  on  3  and  85  df: p= 0.936883