• Set up the environment
    • Non-run-specific parameters
    • Run-specific parameters
    • Set file paths
  • Load the data
  • Clean the data
    • Remove missing values
    • Create new education variables
    • Convert types
    • Rename outcomes
    • Rename treatment
  • Create negative outcomes
  • Print summary statistics
  • Write output file

Set up the environment

#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))

#####STEP 0-2: Install packages #####
list.of.packages <- c( "grf", "metafor", "splitstackshape", "dplyr", "tidyverse", "foreach", "cowplot",
                       "reshape2", "doParallel", "survival", "readstata13", "ggplot2", "rsample", "DiagrammeR",
                       "e1071", "pscl", "pROC", "caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
                       "lmtest", "sandwich","haven", "rpms", "randomForest",  "fabricatr", "gridExtra", 
                       "VIM", "mice", "missForest", "lmtest", "ivreg", "kableExtra")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(list.of.packages, library, character.only = TRUE)
## Warning: package 'grf' was built under R version 4.3.3
## Warning: package 'metafor' was built under R version 4.3.3
## Warning: package 'metadat' was built under R version 4.3.3
## Warning: package 'numDeriv' was built under R version 4.3.1
## Warning: package 'splitstackshape' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'foreach' was built under R version 4.3.3
## Warning: package 'cowplot' was built under R version 4.3.3
## Warning: package 'reshape2' was built under R version 4.3.3
## Warning: package 'doParallel' was built under R version 4.3.3
## Warning: package 'iterators' was built under R version 4.3.3
## Warning: package 'readstata13' was built under R version 4.3.3
## Warning: package 'rsample' was built under R version 4.3.3
## Warning: package 'DiagrammeR' was built under R version 4.3.3
## Warning: package 'e1071' was built under R version 4.3.3
## Warning: package 'pscl' was built under R version 4.3.3
## Warning: package 'pROC' was built under R version 4.3.3
## Warning: package 'caret' was built under R version 4.3.3
## Warning: package 'ModelMetrics' was built under R version 4.3.3
## Warning: package 'MatchIt' was built under R version 4.3.3
## Warning: package 'Hmisc' was built under R version 4.3.3
## Warning: package 'scales' was built under R version 4.3.3
## Warning: package 'lmtest' was built under R version 4.3.3
## Warning: package 'zoo' was built under R version 4.3.3
## Warning: package 'sandwich' was built under R version 4.3.3
## Warning: package 'haven' was built under R version 4.3.3
## Warning: package 'rpms' was built under R version 4.3.3
## Warning: package 'randomForest' was built under R version 4.3.3
## Warning: package 'fabricatr' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
## Warning: package 'VIM' was built under R version 4.3.3
## Warning: package 'colorspace' was built under R version 4.3.3
## Warning: package 'mice' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
## Warning: package 'missForest' was built under R version 4.3.3
## Warning: package 'ivreg' was built under R version 4.3.3
## Warning: package 'kableExtra' was built under R version 4.3.3
print(paste("Version of grf package:", packageVersion("grf"))) 

#####STEP 0-3: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)

Non-run-specific parameters

#####STEP 0-4: Set non-run-specific parameters #####
seedset <- 777
numthreadsset <- min(6, parallel::detectCores()) 
if (numthreadsset!= 6) {
  print("the results of grf vary by num.thread (publication paper used num.thread=6)")
} 

cat("number of threads (affects grf results):", numthreadsset,"\n")
## number of threads (affects grf results): 6
# Set printing options
options(digits = 4)

Run-specific parameters

#####STEP 0-5: Set run-specific parameters #####
published_paper_run <- 0 # ANALYST FORM
if (published_paper_run == 1) {
  print("save intermediate files into Cleaned_input_data/As_published/empirical/ folders")
  warning("Changing this setting to 1 overwrites the input files required for replicating on different platforms.")
} else {
  print("save intermediate files into Cleaned_input_data/Testing/empirical/ folder")
}
## [1] "save intermediate files into Cleaned_input_data/Testing/empirical/ folder"

Set file paths

#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run value
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
processedpath <- if (published_paper_run == 1) {
  "PP_Full_Analysis/Cleaned_input_data/As_published/empirical/"
} else {
  "PP_Full_Analysis/Cleaned_input_data/Testing/empirical/"
}

# Print the processed path
print(paste("Processed data path:", processedpath))
## [1] "Processed data path: PP_Full_Analysis/Cleaned_input_data/Testing/empirical/"

Load the data

#####STEP 1-1: Read data #####
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
d <- read_dta("PP_Full_Analysis/Input_Data/data_for_analysis.dta")

print("Information on raw data:")
## [1] "Information on raw data:"
glimpse(d)
## Rows: 20,745
## Columns: 559
## $ person_id                  <dbl> 5, 8, 9, 16, 17, 18, 19, 23, 24, 29, 33, 34…
## $ in_inperson_data           <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ sample_inp_resp            <dbl+lbl> 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, …
## $ weight_total_inp           <dbl> 1.1504, 0.8975, 0.0000, 1.0000, 1.2126, 1.0…
## $ dt_release_inp             <date> 2009-12-28, 2010-03-09, NA, 2010-04-12, 20…
## $ dt_completed_inp           <date> 2010-02-06, 2010-04-03, NA, 2010-05-01, 20…
## $ interview_location_inp     <dbl+lbl>  1,  3, NA,  4,  4,  1, NA,  2,  1,  2,…
## $ interviewer_inp            <dbl> 6, 3, NA, 17, 17, 44, NA, 5, 11, 14, NA, NA…
## $ scale_id                   <dbl+lbl> 29, 19, NA,  6,  2, 23, NA, 41, 30, 40,…
## $ stadio_id                  <dbl+lbl> 28, 20, NA,  6,  1, 24, NA, 41, 25, 40,…
## $ omron_id                   <dbl+lbl> 12, 15, NA,  4,  2, 21, NA, 11, 18, 36,…
## $ language_capi_inp          <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  1,  0,…
## $ interpreter_inp            <dbl+lbl>  0,  0, NA,  0,  0, NA, NA,  1,  0,  0,…
## $ gender_inp                 <dbl+lbl>  1,  0, NA,  1,  0,  0, NA,  1,  0,  0,…
## $ age_inp                    <dbl> 60, 41, NA, 39, 52, 51, NA, 32, 34, 23, NA,…
## $ health_last12_inp          <dbl+lbl>  5,  3, NA,  4,  4,  4, NA,  6,  3,  1,…
## $ health_change_inp          <dbl+lbl>  2,  1, NA,  2,  2,  1, NA,  2,  2,  1,…
## $ sf4_inp                    <dbl+lbl>  2,  3, NA,  2,  5,  2, NA,  2,  3,  4,…
## $ ast_dx_pre_lottery         <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  1,…
## $ dia_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ hbp_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  1,  0, NA,  0,  0,  0,…
## $ chl_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ ami_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ chf_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ emp_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ kid_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ cancer_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ dep_dx_pre_lottery         <dbl+lbl>  0,  0, NA,  0,  1,  0, NA,  0,  0,  1,…
## $ dia_dx_post_lottery        <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ hbp_dx_post_lottery        <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ chl_dx_post_lottery        <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ dep_dx_post_lottery        <dbl+lbl>  0,  0, NA, NA,  0,  0, NA,  0,  0,  0,…
## $ happiness_inp              <dbl+lbl>  0,  1, NA,  1,  2,  1, NA,  1,  0,  2,…
## $ phqtot_inp                 <dbl> 1, 9, NA, 2, 13, 2, NA, 3, 2, 14, NA, NA, N…
## $ pcs8_score                 <dbl> 55.33, 20.08, NA, 50.22, 44.19, 46.48, NA, …
## $ mcs8_score                 <dbl> 45.38, 53.05, NA, 50.81, 47.71, 42.73, NA, …
## $ usual_clinic_inp           <dbl+lbl> NA,  0, NA,  1,  0,  0, NA,  1,  0,  0,…
## $ needmet_med_inp            <dbl+lbl>  1,  0, NA,  1,  0,  1, NA,  1,  1,  0,…
## $ needmet_ment_inp           <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  0,…
## $ needmet_rx_inp             <dbl+lbl>  1,  0, NA,  1,  1,  1, NA,  1,  1,  0,…
## $ med_qual_inp               <dbl+lbl>  0,  2, NA,  2,  5,  0, NA,  5,  0,  1,…
## $ chl_chk_inp                <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ pap_chk_inp                <dbl+lbl>  0, NA, NA,  1, NA, NA, NA,  0, NA, NA,…
## $ mam_chk_inp                <dbl+lbl>  0, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ fobt_chk_inp               <dbl+lbl>  0, NA, NA, NA,  0,  0, NA, NA, NA, NA,…
## $ col_chk_inp                <dbl+lbl>  0, NA, NA, NA,  0,  0, NA, NA, NA, NA,…
## $ psa_chk_inp                <dbl+lbl> NA, NA, NA, NA,  0,  0, NA, NA, NA, NA,…
## $ did_flu_inp                <dbl+lbl>  0, NA, NA, NA,  0,  0, NA, NA, NA, NA,…
## $ smk_curr_inp               <dbl+lbl>  2,  0, NA,  2,  0,  0, NA,  2,  2,  2,…
## $ cvd_risk_point             <dbl> 0.137, 0.112, NA, 0.033, 0.253, 0.156, NA, …
## $ doc_num_incl_probe_inp     <dbl> 0, 6, NA, 12, 0, 0, NA, 5, 0, 5, NA, NA, NA…
## $ doc_any_incl_probe_inp     <dbl+lbl>  0,  1, NA,  1,  0,  0, NA,  1,  0,  1,…
## $ ed_num_incl_probe_inp      <dbl> 0, 2, NA, 1, 1, 0, NA, 0, 0, 10, NA, NA, NA…
## $ ed_any_incl_probe_inp      <dbl+lbl>  0,  1, NA,  1,  1,  0, NA,  0,  0,  1,…
## $ surg_num_incl_probe_inp    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ surg_any_incl_probe_inp    <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  1,…
## $ hosp_num_incl_probe_inp    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ hosp_any_incl_probe_inp    <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ tot_med_spend_other_inp    <dbl> 0, 170, NA, 0, 456, 0, NA, 0, 0, 265, NA, N…
## $ any_oop_spending           <dbl+lbl>  0,  1, NA,  0,  1,  0, NA,  0,  0,  1,…
## $ catastrophic_exp_inp       <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ ins_any_inp                <dbl+lbl>  1,  0, NA,  1,  0,  0, NA,  1,  0,  0,…
## $ ins_ohp_inp                <dbl+lbl>  0,  0, NA,  1,  0,  0, NA,  1,  0,  0,…
## $ ins_private_inp            <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ owe_inp                    <dbl+lbl>  0,  1, NA,  0,  1,  0, NA,  1,  0,  1,…
## $ borrow_inp                 <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ edu_inp                    <dbl+lbl>  2,  2, NA,  2,  1,  3, NA,  1,  1,  1,…
## $ hispanic_inp               <dbl+lbl>  1,  0, NA,  0,  0,  0, NA,  1,  1,  0,…
## $ race_white_inp             <dbl+lbl>  0,  1, NA,  1,  1,  0, NA,  0,  1,  0,…
## $ race_black_inp             <dbl+lbl>  0,  0, NA,  0,  0,  1, NA,  0,  0,  1,…
## $ race_nwother_inp           <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  1,…
## $ a1c_inp                    <dbl> 5.037, 5.201, NA, 5.854, 5.364, 5.527, NA, …
## $ hdl_inp                    <dbl> 48.33, 51.33, NA, 38.58, 51.33, 28.08, NA, …
## $ chl_inp                    <dbl> 241.0, 229.9, NA, 229.9, 235.4, 177.7, NA, …
## $ bmi_inp                    <dbl> 26.66, 35.23, NA, 37.12, 24.81, 27.02, NA, …
## $ bp_sar_inp                 <dbl> 144, 134, NA, 126, 168, 119, NA, 98, 108, 1…
## $ bp_dar_inp                 <dbl> 81, 82, NA, 94, 110, 79, NA, 59, 63, 76, NA…
## $ has_bp_inp                 <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,…
## $ has_waist_inp              <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,…
## $ has_hght_wght_inp          <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,…
## $ has_dbs_inp                <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,…
## $ has_all_dbs_inp            <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  1,…
## $ rx_any_mod_inp             <dbl+lbl>  0,  1, NA,  1,  1,  0, NA,  0,  0,  1,…
## $ rx_num_mod_inp             <dbl> 0, 2, NA, 2, NA, 0, NA, 0, 0, 3, NA, NA, NA…
## $ hbp_diure_med_inp          <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ antihyperlip_med_inp       <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ diabetes_med_inp           <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ antidep_med_inp            <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0,…
## $ meds_miss_comp             <dbl+lbl>  0,  0, NA,  0,  1,  0, NA,  0,  0,  0,…
## $ household_id               <dbl> 100005, 102094, 100009, 140688, 100017, 100…
## $ treatment                  <dbl+lbl> 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, …
## $ draw_lottery               <dbl+lbl> 7, 8, 1, 2, 6, 4, 8, 3, 6, 6, 6, 4, 3, …
## $ numhh_list                 <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, …
## $ have_phone_list            <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ english_list               <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, …
## $ first_day_list             <dbl+lbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ pobox_list                 <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ self_list                  <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, …
## $ sample_12m_resp            <dbl+lbl> 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, …
## $ weight_12m                 <dbl> 1.000, 1.000, 1.000, 2.824, 0.000, 0.000, 0…
## $ ohp_all_ever_admin         <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ ohp_all_ever_inperson      <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ ohp_all_ever_survey        <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ ohp_all_end_inperson       <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ ohp_all_mo_inperson        <dbl+lbl>  0,  0,  0, 15,  0,  0,  0, 25,  0,  0,…
## $ ohp_std_ever_inperson      <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ age_decile_inp             <dbl> 10, 5, NA, 5, 8, 8, NA, 3, 4, 1, NA, NA, NA…
## $ age_decile_dum_inp2        <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp3        <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp4        <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 1, 0, NA, NA, NA,…
## $ age_decile_dum_inp5        <dbl> 0, 1, NA, 1, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp6        <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp7        <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp8        <dbl> 0, 0, NA, 0, 1, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp9        <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ age_decile_dum_inp10       <dbl> 1, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ older                      <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ younger                    <dbl> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0…
## $ age_19_34_inp              <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 1, 1, NA, NA, NA,…
## $ age_35_49_inp              <dbl> 0, 1, NA, 1, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ age_50_64_inp              <dbl> 1, 0, NA, 0, 1, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ int_loc_cat_inp_1          <dbl> 1, 0, NA, 0, 0, 1, NA, 0, 1, 0, NA, NA, NA,…
## $ int_loc_cat_inp_2          <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 1, NA, NA, NA,…
## $ int_loc_cat_inp_3          <dbl> 0, 1, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ int_loc_cat_inp_4          <dbl> 0, 0, NA, 1, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ interview_season           <dbl+lbl>  4,  1, NA,  1,  4,  3, NA,  2,  1,  1,…
## $ interview_season_1         <dbl> 0, 1, NA, 1, 0, 0, NA, 0, 1, 1, NA, NA, NA,…
## $ interview_season_2         <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 0, NA, NA, NA,…
## $ interview_season_3         <dbl> 0, 0, NA, 0, 0, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ interview_season_4         <dbl> 1, 0, NA, 0, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ interview_weekend          <dbl> 1, 1, NA, 1, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ itvr_1                     <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_2                     <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_3                     <dbl> 0, 1, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_4                     <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_5                     <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 0, NA, NA, NA,…
## $ itvr_6                     <dbl> 1, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_7                     <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_8                     <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_9                     <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_10                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_11                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 1, 0, NA, NA, NA,…
## $ itvr_12                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_13                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_14                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ itvr_15                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_16                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_17                    <dbl> 0, 0, NA, 1, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_18                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_19                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_20                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_21                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_22                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_23                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_24                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_25                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_26                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_27                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_28                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_29                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_30                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_31                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_32                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_33                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_34                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_35                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_36                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_37                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_38                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_39                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_40                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_41                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_42                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_43                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_44                    <dbl> 0, 0, NA, 0, 0, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_45                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_46                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_47                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_48                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvr_49                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_1                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_2                    <dbl> 0, 0, NA, 0, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_3                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_4                    <dbl> 0, 0, NA, 1, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_5                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_6                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_7                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_8                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_9                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_10                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_11                   <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 0, NA, NA, NA,…
## $ omron_12                   <dbl> 1, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_13                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_14                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_15                   <dbl> 0, 1, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_16                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_17                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_18                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 1, 0, NA, NA, NA,…
## $ omron_19                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_20                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_21                   <dbl> 0, 0, NA, 0, 0, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_22                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_23                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_24                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_25                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_26                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_27                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_28                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_29                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_30                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_31                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_32                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_33                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_34                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_35                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_36                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ omron_37                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_38                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_39                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_40                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ omron_41                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_1                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_2                    <dbl> 0, 0, NA, 0, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_3                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_4                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_5                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_6                    <dbl> 0, 0, NA, 1, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_7                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_8                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_9                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_10                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_11                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_12                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_13                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_14                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_15                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_16                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_17                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_18                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_19                   <dbl> 0, 1, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_20                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_21                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_22                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_23                   <dbl> 0, 0, NA, 0, 0, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_24                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_25                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_26                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_27                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_28                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_29                   <dbl> 1, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_30                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 1, 0, NA, NA, NA,…
## $ scale_31                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_32                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_33                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_34                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_35                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_36                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_37                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_38                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_39                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_40                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ scale_41                   <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 0, NA, NA, NA,…
## $ scale_42                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ scale_43                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_1                   <dbl> 0, 0, NA, 0, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_2                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_3                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_4                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_5                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_6                   <dbl> 0, 0, NA, 1, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_7                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_8                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_9                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_10                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_11                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_12                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_13                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_14                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_15                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_16                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_17                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_18                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_19                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_20                  <dbl> 0, 1, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_21                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_22                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_23                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_24                  <dbl> 0, 0, NA, 0, 0, 1, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_25                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 1, 0, NA, NA, NA,…
## $ stadio_26                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_27                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_28                  <dbl> 1, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_29                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_30                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_31                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_32                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_33                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_34                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_35                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_36                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_37                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_38                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_39                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_40                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ stadio_41                  <dbl> 0, 0, NA, 0, 0, 0, NA, 1, 0, 0, NA, NA, NA,…
## $ stadio_42                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ stadio_43                  <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ itvw_english_inp           <dbl> 1, 1, NA, 1, 1, 1, NA, 0, 0, 1, NA, NA, NA,…
## $ valid_meds_inp             <dbl> 1, 1, NA, 1, 0, 1, NA, 1, 1, 1, NA, NA, NA,…
## $ pain_low_inp               <dbl> 1, 0, NA, 1, 0, 1, NA, 1, 0, 0, NA, NA, NA,…
## $ health_last12_good         <dbl+lbl>  1,  0, NA,  1,  1,  1, NA,  1,  0,  0,…
## $ health_last12_notbad       <dbl+lbl>  1,  1, NA,  1,  1,  1, NA,  1,  1,  0,…
## $ health_change_noworse      <dbl+lbl>  1,  0, NA,  1,  1,  0, NA,  1,  1,  0,…
## $ obese                      <dbl> 0, 1, NA, 1, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ bp_prehyper                <dbl> 1, 1, NA, 1, 1, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ bp_hyper                   <dbl> 1, 0, NA, 1, 1, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ a1c_dia                    <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ a1c_pre_dia                <dbl> 0, 0, NA, 1, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ chl_high                   <dbl> 1, 1, NA, 1, 1, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ chl_h                      <dbl> 1, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ hdl_low                    <dbl> 0, 0, NA, 1, 0, 1, NA, 1, 1, 0, NA, NA, NA,…
## $ hdl_high                   <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ phqtot_high                <dbl> 0, 0, NA, 0, 1, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ phq_prob                   <dbl> 0.002, 0.129, NA, 0.002, 0.580, 0.002, NA, …
## $ any_dx_pre_lottery         <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ med_qual_bin_inp           <dbl+lbl> NA,  0, NA,  0,  1, NA, NA,  1, NA,  0,…
## $ smk_curr_bin_inp           <dbl+lbl>  0,  1, NA,  0,  1,  1, NA,  0,  0,  0,…
## $ poshappiness_bin_inp       <dbl+lbl>  1,  1, NA,  1,  0,  1, NA,  1,  1,  0,…
## $ mam50_chk_inp              <dbl> 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ tr_tot_med_spend_other_inp <dbl> 0, 170, NA, 0, 456, 0, NA, 0, 0, 265, NA, N…
## $ doc_num_mod_inp            <dbl> 0, 6, NA, 12, 0, 0, NA, 5, 0, 5, NA, NA, NA…
## $ ed_num_mod_inp             <dbl> 0, 2, NA, 1, 1, 0, NA, 0, 0, 10, NA, NA, NA…
## $ surg_num_mod_inp           <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ hosp_num_mod_inp_2         <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA,…
## $ response_time              <dbl> 40, 25, NA, 19, 16, 23, NA, 9, 17, 18, NA, …
## $ has_anthro_inp             <dbl> 1, 1, NA, 1, 1, 1, NA, 1, 1, 1, NA, NA, NA,…
## $ any_oop_inp                <dbl> 0, 1, NA, 0, 1, 0, NA, 0, 0, 1, NA, NA, NA,…
## $ tr_tot_spend_inp           <dbl> 0, 170, NA, 0, 456, 0, NA, 0, 0, 265, NA, N…
## $ tot_spend_inp              <dbl> 0, 170, NA, 0, 456, 0, NA, 0, 0, 265, NA, N…
## $ constant                   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ nnnnumhh_li_2              <dbl> 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1…
## $ nnnnumhh_li_3              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ llldraw_lot_2              <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ llldraw_lot_3              <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ llldraw_lot_4              <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ llldraw_lot_5              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ llldraw_lot_6              <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ llldraw_lot_7              <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ llldraw_lot_8              <dbl> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sampbase2_inp              <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sample_0m                  <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ wave_survey0m              <dbl+lbl> 7, 3, 3, 6, 4, 4, 8, 3, 1, 5, 6, 4, 3, …
## $ dt_mail_0m                 <date> 2008-09-07, 2008-07-14, 2008-07-14, 2008-0…
## $ returned_0m                <dbl+lbl> 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, …
## $ dt_returned_0m             <date> NA, 2008-08-05, 2008-08-29, NA, 2008-09-10…
## $ ret_mode_0m                <chr> "", "Mail", "Phone", "", "Phone", "", "", "…
## $ surv_lang_0m               <chr> "", "English", "English", "", "English", ""…
## $ in_survey_0m               <dbl+lbl> 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, …
## $ app_received_0m            <dbl+lbl> NA,  1,  1, NA,  1, NA, NA,  1, NA,  0,…
## $ app_sentin_0m              <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  1, NA, NA,…
## $ app_prob_inc_0m            <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_ins_0m            <dbl+lbl> NA, NA,  1, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_fin_0m            <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_not_0m            <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_hass_0m           <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_find_0m           <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_ofind_0m          <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_prob_some_0m           <dbl+lbl> NA, NA,  0, NA,  1, NA, NA, NA, NA, NA,…
## $ app_prob_dont_0m           <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA, NA,…
## $ app_approved_det_0m        <dbl+lbl> NA,  2, NA, NA, NA, NA, NA,  1, NA, NA,…
## $ app_deny_high_0m           <dbl+lbl> NA,  0, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ app_deny_long_0m           <dbl+lbl> NA,  0, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ app_deny_late_0m           <dbl+lbl> NA,  0, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ app_deny_pap_0m            <dbl+lbl> NA,  0, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ app_deny_opap_0m           <dbl+lbl> NA,  0, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ app_deny_oth_0m            <dbl+lbl> NA,  0, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ app_deny_dont_0m           <dbl+lbl> NA,  1, NA, NA, NA, NA, NA,  0, NA, NA,…
## $ ins_ohp_0m                 <dbl+lbl> NA,  0,  0, NA, NA, NA, NA,  1, NA, NA,…
## $ ins_medicare_0m            <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ ins_employer_0m            <dbl+lbl> NA,  0,  1, NA,  0, NA, NA,  0, NA,  0,…
## $ ins_privpay_0m             <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ ins_othcov_0m              <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ ins_noins_0m               <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ ins_months_0m              <dbl+lbl> NA,  0,  3, NA,  0, NA, NA,  0, NA,  0,…
## $ usual_place_0m             <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  1, NA,  0,…
## $ usual_care_0m              <dbl+lbl> NA,  2,  1, NA, NA, NA, NA,  4, NA, NA,…
## $ usual_clinic_0m            <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  0, NA,  0,…
## $ need_med_0m                <dbl+lbl> NA,  1,  1, NA,  1, NA, NA,  1, NA,  1,…
## $ needmet_qn_med_0m          <dbl+lbl> NA,  1,  2, NA,  2, NA, NA,  1, NA,  2,…
## $ needmet_med_0m             <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  1, NA,  0,…
## $ reason_care_cost_0m        <dbl+lbl> NA, NA,  0, NA,  1, NA, NA, NA, NA,  1,…
## $ reason_care_ins_0m         <dbl+lbl> NA, NA,  1, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_doc_0m         <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_owe_0m         <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_apt_0m         <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_closed_0m      <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_nodoc_0m       <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_other_0m       <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  1,…
## $ reason_care_dont_0m        <dbl+lbl> NA, NA,  0, NA,  0, NA, NA, NA, NA,  0,…
## $ need_rx_0m                 <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  0, NA,  0,…
## $ needmet_qn_rx_0m           <dbl+lbl> NA,  1,  1, NA, NA, NA, NA, NA, NA, NA,…
## $ needmet_rx_0m              <dbl+lbl> NA,  1,  1, NA,  1, NA, NA,  1, NA,  1,…
## $ reason_rx_cost_0m          <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ reason_rx_ins_0m           <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ reason_rx_doc_0m           <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ reason_rx_get_0m           <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ reason_rx_pharm_0m         <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ reason_rx_other_0m         <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ reason_rx_dont_0m          <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ rx_num_mod_0m              <dbl> NA, 2, 1, NA, 0, NA, NA, 0, NA, 0, NA, NA, …
## $ rx_any_0m                  <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  0, NA,  0,…
## $ need_dent_0m               <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  1, NA,  1,…
## $ needmet_qn_dent_0m         <dbl+lbl> NA,  1,  2, NA, NA, NA, NA,  1, NA,  2,…
## $ needmet_dent_0m            <dbl+lbl> NA,  1,  0, NA,  1, NA, NA,  1, NA,  0,…
## $ doc_any_0m                 <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  1, NA,  0,…
## $ doc_num_mod_0m             <dbl> NA, 3, 1, NA, 0, NA, NA, 1, NA, 0, NA, NA, …
## $ er_any_0m                  <dbl+lbl> NA,  0,  0, NA,  1, NA, NA,  0, NA,  1,…
## $ er_num_mod_0m              <dbl> NA, 0, 0, NA, 1, NA, NA, 0, NA, 4, NA, NA, …
## $ er_noner_0m                <dbl+lbl> NA,  0,  0, NA, NA, NA, NA,  0, NA, NA,…
## $ reason_er_need_0m          <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_closed_0m        <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_apt_0m           <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_doc_0m           <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_copay_0m         <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_go_0m            <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_other_0m         <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_rx_0m            <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ reason_er_dont_0m          <dbl+lbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,  0,…
## $ hosp_any_0m                <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ hosp_num_mod_0m            <dbl> NA, 0, 0, NA, 0, NA, NA, 0, NA, 0, NA, NA, …
## $ total_hosp_0m              <dbl> NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cost_rx_oop_0m             <dbl> NA, 30, 10, NA, NA, NA, NA, 0, NA, NA, NA, …
## $ cost_tot_oop_0m            <dbl> NA, 30.0, 190.0, NA, 0.0, NA, NA, 0.0, NA, …
## $ cost_any_oop_0m            <dbl+lbl> NA,  1,  1, NA,  0, NA, NA,  0, NA,  1,…
## $ cost_borrow_0m             <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ cost_any_owe_0m            <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  0, NA,  1,…
## $ cost_tot_owe_0m            <dbl> NA, 30, 0, NA, 0, NA, NA, 0, NA, 5000, NA, …
## $ cost_refused_0m            <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ health_gen_0m              <dbl+lbl> NA,  4,  2, NA, NA, NA, NA,  3, NA,  5,…
## $ health_gen_bin_0m          <dbl+lbl> NA,  0,  1, NA, NA, NA, NA,  0, NA,  0,…
## $ baddays_phys_0m            <dbl> NA, 0, 0, NA, 0, NA, NA, 0, NA, 30, NA, NA,…
## $ baddays_ment_0m            <dbl> NA, NA, 5, NA, 30, NA, NA, 0, NA, 30, NA, N…
## $ baddays_tot_0m             <dbl> NA, 0, 5, NA, 0, NA, NA, 0, NA, 15, NA, NA,…
## $ health_chg_0m              <dbl+lbl> NA,  2,  2, NA,  3, NA, NA,  2, NA,  1,…
## $ health_chg_bin_0m          <dbl+lbl> NA,  0,  0, NA,  1, NA, NA,  0, NA,  0,…
## $ dia_dx_0m                  <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ ast_dx_0m                  <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  0, NA,  1,…
## $ hbp_dx_0m                  <dbl+lbl> NA,  1,  0, NA,  1, NA, NA,  0, NA,  0,…
## $ emp_dx_0m                  <dbl+lbl> NA,  1,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ chf_dx_0m                  <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ dep_dx_0m                  <dbl+lbl> NA,  1,  1, NA,  1, NA, NA,  0, NA,  1,…
## $ female_0m                  <dbl+lbl> NA,  0,  0, NA,  1, NA, NA,  1, NA,  1,…
## $ birthyear_0m               <dbl> NA, 1968, 1977, NA, 1957, NA, NA, 1977, NA,…
## $ employ_0m                  <dbl+lbl> NA,  0,  0, NA,  1, NA, NA,  1, NA,  1,…
## $ employ_det_0m              <dbl+lbl> NA,  3,  3, NA,  2, NA, NA,  1, NA,  2,…
## $ hhinc_cat_0m               <dbl+lbl> NA,  1,  8, NA,  7, NA, NA,  2, NA,  1,…
## $ race_hisp_0m               <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  1, NA,  1,…
## $ race_white_0m              <dbl+lbl> NA,  1,  1, NA,  1, NA, NA,  0, NA,  0,…
## $ race_black_0m              <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  1,…
## $ race_amerindian_0m         <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  1,…
## $ race_asian_0m              <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ race_pacific_0m            <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  0, NA,  0,…
## $ race_other_qn_0m           <dbl+lbl> NA,  0,  0, NA,  0, NA, NA,  1, NA,  1,…
## $ employ_hrs_0m              <dbl+lbl> NA,  1,  1, NA,  2, NA, NA, NA, NA,  4,…
## $ edu_0m                     <dbl+lbl> NA,  1,  3, NA,  1, NA, NA,  1, NA,  2,…
## $ living_arrange_0m          <dbl+lbl> NA,  2,  4, NA,  1, NA, NA,  2, NA,  3,…
## $ hhsize_0m                  <dbl> NA, 2, 2, NA, NA, NA, NA, 8, NA, 2, NA, NA,…
## $ hhinc_pctfpl_0m            <dbl> NA, 0.000, 111.531, NA, NA, NA, NA, 3.377, …
## $ num19_0m                   <dbl> NA, 0, 1, NA, NA, NA, NA, 6, NA, 0, NA, NA,…
## $ num19_hi_0m                <dbl+lbl> NA,  3,  1, NA, NA, NA, NA,  1, NA, NA,…
## $ cost_tot_oop_correct_0m    <dbl> NA, 180.0, 240.0, NA, 0.0, NA, NA, 0.0, NA,…
## $ cost_medical_oop_0m        <dbl> NA, 0.0, 180.0, NA, 0.0, NA, NA, 0.0, NA, 2…
## $ sample_ed                  <dbl> 1, 1, 1, 1, NA, 1, NA, 1, 1, 1, NA, 1, 1, 1…
## $ any_visit_pre_ed           <dbl+lbl>  0,  0,  1,  1, NA,  1, NA,  0,  0,  1,…
## $ any_visit_ed               <dbl+lbl>  0,  1,  0,  1, NA,  0, NA,  0,  0,  1,…
## $ num_visit_pre_cens_ed      <dbl> 0, 0, 1, 1, NA, 2, NA, 0, 0, 7, NA, 0, 0, 0…
## $ num_visit_cens_ed          <dbl> 0, 2, 0, 5, NA, 0, NA, 0, 0, 5, NA, 0, 0, 0…
## $ any_hosp_pre_ed            <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_hosp_ed                <dbl+lbl>  0,  0,  0,  1, NA,  0, NA,  0,  0,  0,…
## $ num_hosp_pre_cens_ed       <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_hosp_cens_ed           <dbl> 0, 0, 0, 1, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_out_pre_ed             <dbl+lbl>  0,  0,  1,  1, NA,  1, NA,  0,  0,  1,…
## $ any_out_ed                 <dbl+lbl>  0,  1,  0,  1, NA,  0, NA,  0,  0,  1,…
## $ num_out_pre_cens_ed        <dbl> 0, 0, 1, 1, NA, 2, NA, 0, 0, 7, NA, 0, 0, 0…
## $ num_out_cens_ed            <dbl> 0, 2, 0, 4, NA, 0, NA, 0, 0, 5, NA, 0, 0, 0…
## $ any_on_pre_ed              <dbl+lbl>  0,  0,  0,  0, NA,  1, NA,  0,  0,  1,…
## $ any_on_ed                  <dbl+lbl>  0,  1,  0,  1, NA,  0, NA,  0,  0,  1,…
## $ num_on_pre_cens_ed         <dbl> 0, 0, 0, 0, NA, 1, NA, 0, 0, 1, NA, 0, 0, 0…
## $ num_on_cens_ed             <dbl> 0, 2, 0, 3, NA, 0, NA, 0, 0, 3, NA, 0, 0, 0…
## $ any_off_pre_ed             <dbl+lbl>  0,  0,  1,  1, NA,  1, NA,  0,  0,  1,…
## $ any_off_ed                 <dbl+lbl>  0,  0,  0,  1, NA,  0, NA,  0,  0,  1,…
## $ num_off_pre_cens_ed        <dbl> 0, 0, 1, 1, NA, 1, NA, 0, 0, 6, NA, 0, 0, 0…
## $ num_off_cens_ed            <dbl> 0, 0, 0, 2, NA, 0, NA, 0, 0, 2, NA, 0, 0, 0…
## $ num_edcnnp_pre_ed          <dbl> 0.0000, 0.0000, 0.5000, 0.3303, NA, 0.0000,…
## $ num_edcnnp_ed              <dbl> 0.0000, 0.3303, 0.0000, 0.5417, NA, 0.0000,…
## $ num_edcnpa_pre_ed          <dbl> 0.00000, 0.00000, 0.00000, 0.00000, NA, 0.2…
## $ num_edcnpa_ed              <dbl> 0.0000, 0.9811, 0.0000, 0.1774, NA, 0.0000,…
## $ num_epct_pre_ed            <dbl> 0.0000, 0.0000, 0.5000, 0.6697, NA, 0.8612,…
## $ num_epct_ed                <dbl> 0.0000, 0.6886, 0.0000, 1.5726, NA, 0.0000,…
## $ num_ne_pre_ed              <dbl> 0.0000, 0.0000, 0.0000, 0.0000, NA, 0.8974,…
## $ num_ne_ed                  <dbl> 0.000, 0.000, 0.000, 1.708, NA, 0.000, NA, …
## $ num_unclas_pre_ed          <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 1, NA, 0, 0, 0…
## $ num_unclas_ed              <dbl> 0, 0, 0, 1, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_acsc_pre_ed            <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_acsc_ed                <dbl+lbl>  0,  1,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_acsc_pre_cens_ed       <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_acsc_cens_ed           <dbl> 0, 1, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_chron_pre_ed           <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  1,…
## $ any_chron_ed               <dbl+lbl>  0,  1,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_chron_pre_cens_ed      <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 1, NA, 0, 0, 0…
## $ num_chron_cens_ed          <dbl> 0, 1, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_inj_pre_ed             <dbl+lbl>  0,  0,  1,  0, NA,  0, NA,  0,  0,  0,…
## $ any_inj_ed                 <dbl+lbl>  0,  0,  0,  1, NA,  0, NA,  0,  0,  1,…
## $ num_inj_pre_cens_ed        <dbl> 0, 0, 1, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_inj_cens_ed            <dbl> 0, 1, 0, 3, NA, 0, NA, 0, 0, 6, NA, 0, 0, 0…
## $ any_skin_pre_ed            <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_skin_ed                <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  1,…
## $ num_skin_pre_cens_ed       <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_skin_cens_ed           <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 1, NA, 0, 0, 0…
## $ any_abdo_pre_ed            <dbl+lbl>  0,  0,  0,  1, NA,  0, NA,  0,  0,  0,…
## $ any_abdo_ed                <dbl+lbl>  0,  1,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_abdo_pre_cens_ed       <dbl> 0, 0, 0, 1, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_abdo_cens_ed           <dbl> 0, 1, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_back_pre_ed            <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_back_ed                <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_back_pre_cens_ed       <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_back_cens_ed           <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_heart_pre_ed           <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_heart_ed               <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_heart_pre_cens_ed      <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_heart_cens_ed          <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_head_pre_ed            <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_head_ed                <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_head_pre_cens_ed       <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_head_cens_ed           <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_depres_pre_ed          <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_depres_ed              <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_depres_pre_cens_ed     <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_depres_cens_ed         <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_psysub_pre_ed          <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ any_psysub_ed              <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0,…
## $ num_psysub_pre_cens_ed     <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ num_psysub_cens_ed         <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 0, NA, 0, 0, 0…
## $ any_mail_match_ed          <dbl> 0, 0, 0, NA, NA, NA, NA, 0, NA, 1, NA, NA, …
## $ num_mail_match_ed          <dbl> 0, 0, 0, NA, NA, NA, NA, 0, NA, 2, NA, NA, …
## $ any_inp_match_ed           <dbl> 0, 1, NA, 1, NA, 0, NA, 0, 0, 1, NA, NA, NA…
## $ num_inp_match_ed           <dbl> 0, 1, NA, 2, NA, 0, NA, 0, 0, 7, NA, NA, NA…
## $ charg_tot_pre_ed           <dbl> 0.0, 0.0, 789.5, 1888.2, NA, 1715.3, NA, 0.…
## $ charg_tot_ed               <dbl> 0, 2751, 0, 15233, NA, 0, NA, 0, 0, 8436, N…
## $ ed_charg_tot_pre_ed        <dbl> 0.0, 0.0, 789.5, 1888.2, NA, 1006.3, NA, 0.…
## $ ed_charg_tot_ed            <dbl> 0, 2751, 0, 7101, NA, 0, NA, 0, 0, 7067, NA…
## $ any_hiun_pre_ed            <dbl+lbl>  0,  0,  1,  1, NA,  0, NA,  0,  0,  1,…
## $ any_hiun_ed                <dbl+lbl>  0,  1,  0,  1, NA,  0, NA,  0,  0,  1,…
## $ num_hiun_pre_cens_ed       <dbl> 0, 0, 1, 1, NA, 0, NA, 0, 0, 2, NA, 0, 0, 0…
## $ num_hiun_cens_ed           <dbl> 0, 2, 0, 5, NA, 0, NA, 0, 0, 3, NA, 0, 0, 0…
## $ any_loun_pre_ed            <dbl+lbl>  0,  0,  0,  0, NA,  1, NA,  0,  0,  1,…
## $ any_loun_ed                <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  1,…
## $ num_loun_pre_cens_ed       <dbl> 0, 0, 0, 0, NA, 2, NA, 0, 0, 5, NA, 0, 0, 0…
## $ num_loun_cens_ed           <dbl> 0, 0, 0, 0, NA, 0, NA, 0, 0, 2, NA, 0, 0, 0…
covariates <- c("person_id", "treatment", "ohp_all_ever_inperson", "numhh_list", "gender_inp", "age_inp",
                "hispanic_inp", "race_white_inp", "race_black_inp", "race_nwother_inp", "ast_dx_pre_lottery",
                "dia_dx_pre_lottery", "hbp_dx_pre_lottery", "chl_dx_pre_lottery", "ami_dx_pre_lottery",
                "chf_dx_pre_lottery", "emp_dx_pre_lottery", "kid_dx_pre_lottery", "cancer_dx_pre_lottery",
                "dep_dx_pre_lottery", "charg_tot_pre_ed", "ed_charg_tot_pre_ed", "num_visit_pre_cens_ed", "any_depres_pre_ed",
                "household_id", "edu_inp")

weights <- c("weight_total_inp")

outcomes <- c("bp_sar_inp", "bp_dar_inp", "hbp_dx_post_lottery", "hbp_diure_med_inp", "chl_inp", "hdl_inp",  
              "chl_dx_post_lottery", "antihyperlip_med_inp", "a1c_inp", "dia_dx_post_lottery", "diabetes_med_inp",  
              "bmi_inp", "phqtot_inp", "dep_dx_post_lottery", "antidep_med_inp", "cvd_risk_point",  
              "owe_inp", "borrow_inp", "catastrophic_exp_inp", "doc_num_mod_inp", "charg_tot_ed", "ed_charg_tot_ed", 
              "doc_any_incl_probe_inp", "ed_any_incl_probe_inp",  
              "hosp_num_mod_inp_2", "hosp_any_incl_probe_inp", "rx_num_mod_inp", "rx_any_mod_inp", "any_oop_spending", "ed_num_mod_inp")

# collect only the covariates and outcomes that we care about for this study
d.gf <- d[, which(colnames(d) %in% 
                  c(covariates,  # All covariates
                    weights,    # Weights
                    outcomes    # All outcomes
                   ))]

print("Information on data restricted to variables of interest:")
## [1] "Information on data restricted to variables of interest:"
glimpse(d.gf)
## Rows: 20,745
## Columns: 57
## $ person_id               <dbl> 5, 8, 9, 16, 17, 18, 19, 23, 24, 29, 33, 34, 3…
## $ weight_total_inp        <dbl> 1.1504, 0.8975, 0.0000, 1.0000, 1.2126, 1.0000…
## $ gender_inp              <dbl+lbl>  1,  0, NA,  1,  0,  0, NA,  1,  0,  0, NA…
## $ age_inp                 <dbl> 60, 41, NA, 39, 52, 51, NA, 32, 34, 23, NA, NA…
## $ ast_dx_pre_lottery      <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  1, NA…
## $ dia_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ hbp_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  1,  0, NA,  0,  0,  0, NA…
## $ chl_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ ami_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ chf_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ emp_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ kid_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ cancer_dx_pre_lottery   <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ dep_dx_pre_lottery      <dbl+lbl>  0,  0, NA,  0,  1,  0, NA,  0,  0,  1, NA…
## $ dia_dx_post_lottery     <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ hbp_dx_post_lottery     <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ chl_dx_post_lottery     <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ dep_dx_post_lottery     <dbl+lbl>  0,  0, NA, NA,  0,  0, NA,  0,  0,  0, NA…
## $ phqtot_inp              <dbl> 1, 9, NA, 2, 13, 2, NA, 3, 2, 14, NA, NA, NA, …
## $ cvd_risk_point          <dbl> 0.137, 0.112, NA, 0.033, 0.253, 0.156, NA, 0.0…
## $ doc_any_incl_probe_inp  <dbl+lbl>  0,  1, NA,  1,  0,  0, NA,  1,  0,  1, NA…
## $ ed_any_incl_probe_inp   <dbl+lbl>  0,  1, NA,  1,  1,  0, NA,  0,  0,  1, NA…
## $ hosp_any_incl_probe_inp <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ any_oop_spending        <dbl+lbl>  0,  1, NA,  0,  1,  0, NA,  0,  0,  1, NA…
## $ catastrophic_exp_inp    <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ owe_inp                 <dbl+lbl>  0,  1, NA,  0,  1,  0, NA,  1,  0,  1, NA…
## $ borrow_inp              <dbl+lbl>  0,  1, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ edu_inp                 <dbl+lbl>  2,  2, NA,  2,  1,  3, NA,  1,  1,  1, NA…
## $ hispanic_inp            <dbl+lbl>  1,  0, NA,  0,  0,  0, NA,  1,  1,  0, NA…
## $ race_white_inp          <dbl+lbl>  0,  1, NA,  1,  1,  0, NA,  0,  1,  0, NA…
## $ race_black_inp          <dbl+lbl>  0,  0, NA,  0,  0,  1, NA,  0,  0,  1, NA…
## $ race_nwother_inp        <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  1, NA…
## $ a1c_inp                 <dbl> 5.037, 5.201, NA, 5.854, 5.364, 5.527, NA, 5.0…
## $ hdl_inp                 <dbl> 48.33, 51.33, NA, 38.58, 51.33, 28.08, NA, 31.…
## $ chl_inp                 <dbl> 241.0, 229.9, NA, 229.9, 235.4, 177.7, NA, 173…
## $ bmi_inp                 <dbl> 26.66, 35.23, NA, 37.12, 24.81, 27.02, NA, 26.…
## $ bp_sar_inp              <dbl> 144, 134, NA, 126, 168, 119, NA, 98, 108, 125,…
## $ bp_dar_inp              <dbl> 81, 82, NA, 94, 110, 79, NA, 59, 63, 76, NA, N…
## $ rx_any_mod_inp          <dbl+lbl>  0,  1, NA,  1,  1,  0, NA,  0,  0,  1, NA…
## $ rx_num_mod_inp          <dbl> 0, 2, NA, 2, NA, 0, NA, 0, 0, 3, NA, NA, NA, 0…
## $ hbp_diure_med_inp       <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ antihyperlip_med_inp    <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ diabetes_med_inp        <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ antidep_med_inp         <dbl+lbl>  0,  0, NA,  0,  0,  0, NA,  0,  0,  0, NA…
## $ household_id            <dbl> 100005, 102094, 100009, 140688, 100017, 100018…
## $ treatment               <dbl+lbl> 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, …
## $ numhh_list              <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, …
## $ ohp_all_ever_inperson   <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ doc_num_mod_inp         <dbl> 0, 6, NA, 12, 0, 0, NA, 5, 0, 5, NA, NA, NA, 1…
## $ ed_num_mod_inp          <dbl> 0, 2, NA, 1, 1, 0, NA, 0, 0, 10, NA, NA, NA, 2…
## $ hosp_num_mod_inp_2      <dbl> 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, NA, NA, 0,…
## $ num_visit_pre_cens_ed   <dbl> 0, 0, 1, 1, NA, 2, NA, 0, 0, 7, NA, 0, 0, 0, 0…
## $ any_depres_pre_ed       <dbl+lbl>  0,  0,  0,  0, NA,  0, NA,  0,  0,  0, NA…
## $ charg_tot_pre_ed        <dbl> 0.0, 0.0, 789.5, 1888.2, NA, 1715.3, NA, 0.0, …
## $ charg_tot_ed            <dbl> 0, 2751, 0, 15233, NA, 0, NA, 0, 0, 8436, NA, …
## $ ed_charg_tot_pre_ed     <dbl> 0.0, 0.0, 789.5, 1888.2, NA, 1006.3, NA, 0.0, …
## $ ed_charg_tot_ed         <dbl> 0, 2751, 0, 7101, NA, 0, NA, 0, 0, 7067, NA, 0…
# Check for missing values in all columns
print("Missing values in columns:")
## [1] "Missing values in columns:"
colSums(is.na(d.gf))
##               person_id        weight_total_inp              gender_inp 
##                       0                       0                    8516 
##                 age_inp      ast_dx_pre_lottery      dia_dx_pre_lottery 
##                    8517                    8516                    8516 
##      hbp_dx_pre_lottery      chl_dx_pre_lottery      ami_dx_pre_lottery 
##                    8516                    8516                    8516 
##      chf_dx_pre_lottery      emp_dx_pre_lottery      kid_dx_pre_lottery 
##                    8516                    8516                    8516 
##   cancer_dx_pre_lottery      dep_dx_pre_lottery     dia_dx_post_lottery 
##                    8516                    8516                    8559 
##     hbp_dx_post_lottery     chl_dx_post_lottery     dep_dx_post_lottery 
##                    8800                    8851                    8650 
##              phqtot_inp          cvd_risk_point  doc_any_incl_probe_inp 
##                    8584                   11324                    8540 
##   ed_any_incl_probe_inp hosp_any_incl_probe_inp        any_oop_spending 
##                    8541                    8540                    8551 
##    catastrophic_exp_inp                 owe_inp              borrow_inp 
##                    8950                    8637                    8533 
##                 edu_inp            hispanic_inp          race_white_inp 
##                    8527                    8545                    8555 
##          race_black_inp        race_nwother_inp                 a1c_inp 
##                    8555                    8555                    8605 
##                 hdl_inp                 chl_inp                 bmi_inp 
##                    8573                    8571                    8570 
##              bp_sar_inp              bp_dar_inp          rx_any_mod_inp 
##                    8557                    8557                    8519 
##          rx_num_mod_inp       hbp_diure_med_inp    antihyperlip_med_inp 
##                    8833                    8516                    8516 
##        diabetes_med_inp         antidep_med_inp            household_id 
##                    8516                    8516                       0 
##               treatment              numhh_list   ohp_all_ever_inperson 
##                       0                       0                       0 
##         doc_num_mod_inp          ed_num_mod_inp      hosp_num_mod_inp_2 
##                    8587                    8570                    8570 
##   num_visit_pre_cens_ed       any_depres_pre_ed        charg_tot_pre_ed 
##                    3661                    3653                    3659 
##            charg_tot_ed     ed_charg_tot_pre_ed         ed_charg_tot_ed 
##                    3663                    3664                    3667

Clean the data

Remove missing values

Remove observations with missing values for age_inp, gender_inp, all race variables, treatment, and edu_inp.

#####STEP 1-2: Remove missing values #####
# Initialize newd as d.gf
newd <- d.gf

# Print dimensions of newd
print("Dimensions of newd:")
## [1] "Dimensions of newd:"
dim(newd)
## [1] 20745    57
print("Summary stats of age_inp before removing missing:")
## [1] "Summary stats of age_inp before removing missing:"
summary(newd$age_inp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      19      31      41      41      50      71    8517
# Remove missing age_inp values
newd <- newd[!is.na(newd$age_inp), ]

print("Dimensions after removing missing age_inp:")
## [1] "Dimensions after removing missing age_inp:"
dim(newd)
## [1] 12228    57
print("Summary stats of age_inp after removing missing:")
## [1] "Summary stats of age_inp after removing missing:"
summary(newd$age_inp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    19.0    31.0    41.0    40.8    50.0    71.0
print("Summary stats of gender_inp before removing transgender individuals:")
## [1] "Summary stats of gender_inp before removing transgender individuals:"
table(newd$gender_inp, useNA = "ifany")
## 
##    0    1    2 
## 5312 6915    1
# Filter gender_inp to keep only 0 and 1
newd <- newd[(newd$gender_inp==1 | newd$gender_inp==0), ]

print("Dimensions after removing transgender individuals:")
## [1] "Dimensions after removing transgender individuals:"
dim(newd)
## [1] 12227    57
print("Summary stats of gender_inp after removing transgender individuals:")
## [1] "Summary stats of gender_inp after removing transgender individuals:"
table(newd$gender_inp, useNA = "ifany")
## 
##    0    1 
## 5312 6915
print("Summary stats of race variables before removing observations missing all race variables:")
## [1] "Summary stats of race variables before removing observations missing all race variables:"
print("hispanic_inp:")
## [1] "hispanic_inp:"
table(newd$hispanic_inp, useNA = "ifany")
## 
##     0     1  <NA> 
## 10001  2197    29
print("race_white_inp:")
## [1] "race_white_inp:"
table(newd$race_white_inp, useNA = "ifany")
## 
##    0    1 <NA> 
## 3800 8388   39
print("race_black_inp:")
## [1] "race_black_inp:"
table(newd$race_black_inp, useNA = "ifany")
## 
##     0     1  <NA> 
## 10929  1259    39
print("race_nwother_inp:")
## [1] "race_nwother_inp:"
table(newd$race_nwother_inp, useNA = "ifany")
## 
##     0     1  <NA> 
## 10430  1758    39
# Remove if ALL race variables are missing
newd <- newd[!(is.na(newd$hispanic_inp) & 
               is.na(newd$race_white_inp) & 
               is.na(newd$race_black_inp) & 
               is.na(newd$race_nwother_inp)), ]

print("Dimensions after removing observations missing all race variables:")
## [1] "Dimensions after removing observations missing all race variables:"
dim(newd)
## [1] 12211    57
print("Summary stats of race variables after removing observations missing all race variables:")
## [1] "Summary stats of race variables after removing observations missing all race variables:"
print("hispanic_inp:")
## [1] "hispanic_inp:"
table(newd$hispanic_inp, useNA = "ifany")
## 
##     0     1  <NA> 
## 10001  2197    13
print("race_white_inp:")
## [1] "race_white_inp:"
table(newd$race_white_inp, useNA = "ifany")
## 
##    0    1 <NA> 
## 3800 8388   23
print("race_black_inp:")
## [1] "race_black_inp:"
table(newd$race_black_inp, useNA = "ifany")
## 
##     0     1  <NA> 
## 10929  1259    23
print("race_nwother_inp:")
## [1] "race_nwother_inp:"
table(newd$race_nwother_inp, useNA = "ifany")
## 
##     0     1  <NA> 
## 10430  1758    23
# Replace missing values of all race variables with 0
newd$hispanic_inp[is.na(newd$hispanic_inp)] <- 0
newd$race_white_inp[is.na(newd$race_white_inp)] <- 0
newd$race_black_inp[is.na(newd$race_black_inp)] <- 0
newd$race_nwother_inp[is.na(newd$race_nwother_inp)] <- 0

print("Dimensions after replacing missing race variables with 0:")
## [1] "Dimensions after replacing missing race variables with 0:"
dim(newd)
## [1] 12211    57
print("Summary stats of race variables after replacing missing race variables with 0:")
## [1] "Summary stats of race variables after replacing missing race variables with 0:"
print("hispanic_inp:")
## [1] "hispanic_inp:"
table(newd$hispanic_inp, useNA = "ifany")
## 
##     0     1 
## 10014  2197
print("race_white_inp:")
## [1] "race_white_inp:"
table(newd$race_white_inp, useNA = "ifany")
## 
##    0    1 
## 3823 8388
print("race_black_inp:")
## [1] "race_black_inp:"
table(newd$race_black_inp, useNA = "ifany")
## 
##     0     1 
## 10952  1259
print("race_nwother_inp:")
## [1] "race_nwother_inp:"
table(newd$race_nwother_inp, useNA = "ifany")
## 
##     0     1 
## 10453  1758
print("Summary stats of treatment before removing missing:")
## [1] "Summary stats of treatment before removing missing:"
table(newd$treatment, useNA = "ifany")
## 
##    0    1 
## 5836 6375
# Remove missing treatment values
newd <- newd[!is.na(newd$treatment), ]

print("Dimensions after removing missing treatment:")
## [1] "Dimensions after removing missing treatment:"
dim(newd)
## [1] 12211    57
print("Summary stats of treatment after removing missing:")
## [1] "Summary stats of treatment after removing missing:"
table(newd$treatment, useNA = "ifany")
## 
##    0    1 
## 5836 6375
print("Summary stats of edu_inp before removing missing:")
## [1] "Summary stats of edu_inp before removing missing:"
table(newd$edu_inp, useNA = "ifany")
## 
##    1    2    3    4 <NA> 
## 2503 5545 2772 1388    3
# Remove missing edu_inp values
newd <- newd[!is.na(newd$edu_inp), ]

print("Dimensions after removing missing edu_inp:")
## [1] "Dimensions after removing missing edu_inp:"
dim(newd)
## [1] 12208    57
print("Summary stats of edu_inp after removing missing:")
## [1] "Summary stats of edu_inp after removing missing:"
table(newd$edu_inp, useNA = "ifany")
## 
##    1    2    3    4 
## 2503 5545 2772 1388
# Check for missing values in all columns
colSums(is.na(newd))
##               person_id        weight_total_inp              gender_inp 
##                       0                       0                       0 
##                 age_inp      ast_dx_pre_lottery      dia_dx_pre_lottery 
##                       0                       0                       0 
##      hbp_dx_pre_lottery      chl_dx_pre_lottery      ami_dx_pre_lottery 
##                       0                       0                       0 
##      chf_dx_pre_lottery      emp_dx_pre_lottery      kid_dx_pre_lottery 
##                       0                       0                       0 
##   cancer_dx_pre_lottery      dep_dx_pre_lottery     dia_dx_post_lottery 
##                       0                       0                      41 
##     hbp_dx_post_lottery     chl_dx_post_lottery     dep_dx_post_lottery 
##                     281                     332                     131 
##              phqtot_inp          cvd_risk_point  doc_any_incl_probe_inp 
##                      64                    2801                      17 
##   ed_any_incl_probe_inp hosp_any_incl_probe_inp        any_oop_spending 
##                      18                      17                      28 
##    catastrophic_exp_inp                 owe_inp              borrow_inp 
##                     424                     114                      10 
##                 edu_inp            hispanic_inp          race_white_inp 
##                       0                       0                       0 
##          race_black_inp        race_nwother_inp                 a1c_inp 
##                       0                       0                      89 
##                 hdl_inp                 chl_inp                 bmi_inp 
##                      57                      55                      54 
##              bp_sar_inp              bp_dar_inp          rx_any_mod_inp 
##                      41                      41                       2 
##          rx_num_mod_inp       hbp_diure_med_inp    antihyperlip_med_inp 
##                     311                       0                       0 
##        diabetes_med_inp         antidep_med_inp            household_id 
##                       0                       0                       0 
##               treatment              numhh_list   ohp_all_ever_inperson 
##                       0                       0                       0 
##         doc_num_mod_inp          ed_num_mod_inp      hosp_num_mod_inp_2 
##                      64                      47                      47 
##   num_visit_pre_cens_ed       any_depres_pre_ed        charg_tot_pre_ed 
##                    2055                    2050                    2053 
##            charg_tot_ed     ed_charg_tot_pre_ed         ed_charg_tot_ed 
##                    2055                    2058                    2056

Create new education variables

Create lessHS and HSorGED variables. Individuals equal to 0 for both of these variables are “3” or “4” for edu_inp.

#####STEP 1-3: Create new education variables #####
# create categories for prior education
newd$lessHS=0
newd$lessHS[newd$edu_inp==1]=1 
newd$HSorGED=0
newd$HSorGED[newd$edu_inp==2]=1 

# Print summary statistics of new education variables
print("Summary statistics for HS or less:")
## [1] "Summary statistics for HS or less:"
table(newd$lessHS, useNA = "ifany")
## 
##    0    1 
## 9705 2503
print("Summary statistics for At least HS/GED:")
## [1] "Summary statistics for At least HS/GED:"
table(newd$HSorGED, useNA = "ifany")
## 
##    0    1 
## 6663 5545
# New variables created
new_vars<-c("lessHS","HSorGED")

# Old variables to drop
old_vars <- c("edu_inp")

# Print dimensions of newd after creating new variables
print("Dimensions of newd after creating new variables:")
## [1] "Dimensions of newd after creating new variables:"
dim(newd)
## [1] 12208    59
# Drop the old variables.
newd_adjusted <- newd[, !(names(newd) %in% c(old_vars))]

# Print dimensions of newd_adjusted after dropping old variables
print("Dimensions of newd_adjusted after dropping edu_inp:")
## [1] "Dimensions of newd_adjusted after dropping edu_inp:"
dim(newd_adjusted)
## [1] 12208    58
# Print types of newd_adjusted
print("Information on newd_adjusted:")
## [1] "Information on newd_adjusted:"
glimpse(newd_adjusted)
## Rows: 12,208
## Columns: 58
## $ person_id               <dbl> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, …
## $ weight_total_inp        <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033…
## $ gender_inp              <dbl+lbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, …
## $ age_inp                 <dbl> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25…
## $ ast_dx_pre_lottery      <dbl+lbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, …
## $ dia_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, …
## $ chl_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery   <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ dep_dx_pre_lottery      <dbl+lbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, …
## $ dia_dx_post_lottery     <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_post_lottery     <dbl+lbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chl_dx_post_lottery     <dbl+lbl>  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0…
## $ dep_dx_post_lottery     <dbl+lbl>  0,  0, NA,  0,  0,  0,  0,  0,  0,  0,  0…
## $ phqtot_inp              <dbl> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2…
## $ cvd_risk_point          <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120…
## $ doc_any_incl_probe_inp  <dbl+lbl> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, …
## $ ed_any_incl_probe_inp   <dbl+lbl> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ hosp_any_incl_probe_inp <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ any_oop_spending        <dbl+lbl> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, …
## $ catastrophic_exp_inp    <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ owe_inp                 <dbl+lbl> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, …
## $ borrow_inp              <dbl+lbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, …
## $ hispanic_inp            <dbl+lbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ race_white_inp          <dbl+lbl> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, …
## $ race_black_inp          <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, …
## $ race_nwother_inp        <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ a1c_inp                 <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.44…
## $ hdl_inp                 <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.8…
## $ chl_inp                 <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.…
## $ bmi_inp                 <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.7…
## $ bp_sar_inp              <dbl> 144, 134, 126, 168, 119, 98, 108, 125, 100, 10…
## $ bp_dar_inp              <dbl> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 6…
## $ rx_any_mod_inp          <dbl+lbl> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, …
## $ rx_num_mod_inp          <dbl> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, …
## $ hbp_diure_med_inp       <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, …
## $ antihyperlip_med_inp    <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med_inp        <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ antidep_med_inp         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ household_id            <dbl> 100005, 102094, 140688, 100017, 100018, 115253…
## $ treatment               <dbl+lbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, …
## $ numhh_list              <dbl+lbl> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, …
## $ ohp_all_ever_inperson   <dbl+lbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, …
## $ doc_num_mod_inp         <dbl> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10…
## $ ed_num_mod_inp          <dbl> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, …
## $ hosp_num_mod_inp_2      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2…
## $ num_visit_pre_cens_ed   <dbl> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0,…
## $ any_depres_pre_ed       <dbl+lbl>  0,  0,  0, NA,  0,  0,  0,  0,  0, NA,  0…
## $ charg_tot_pre_ed        <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9…
## $ charg_tot_ed            <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0…
## $ ed_charg_tot_pre_ed     <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4…
## $ ed_charg_tot_ed         <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0…
## $ lessHS                  <dbl> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1…
## $ HSorGED                 <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0…
# Check for missing values
print("Missing values in columns:")
## [1] "Missing values in columns:"
colSums(is.na(newd_adjusted))
##               person_id        weight_total_inp              gender_inp 
##                       0                       0                       0 
##                 age_inp      ast_dx_pre_lottery      dia_dx_pre_lottery 
##                       0                       0                       0 
##      hbp_dx_pre_lottery      chl_dx_pre_lottery      ami_dx_pre_lottery 
##                       0                       0                       0 
##      chf_dx_pre_lottery      emp_dx_pre_lottery      kid_dx_pre_lottery 
##                       0                       0                       0 
##   cancer_dx_pre_lottery      dep_dx_pre_lottery     dia_dx_post_lottery 
##                       0                       0                      41 
##     hbp_dx_post_lottery     chl_dx_post_lottery     dep_dx_post_lottery 
##                     281                     332                     131 
##              phqtot_inp          cvd_risk_point  doc_any_incl_probe_inp 
##                      64                    2801                      17 
##   ed_any_incl_probe_inp hosp_any_incl_probe_inp        any_oop_spending 
##                      18                      17                      28 
##    catastrophic_exp_inp                 owe_inp              borrow_inp 
##                     424                     114                      10 
##            hispanic_inp          race_white_inp          race_black_inp 
##                       0                       0                       0 
##        race_nwother_inp                 a1c_inp                 hdl_inp 
##                       0                      89                      57 
##                 chl_inp                 bmi_inp              bp_sar_inp 
##                      55                      54                      41 
##              bp_dar_inp          rx_any_mod_inp          rx_num_mod_inp 
##                      41                       2                     311 
##       hbp_diure_med_inp    antihyperlip_med_inp        diabetes_med_inp 
##                       0                       0                       0 
##         antidep_med_inp            household_id               treatment 
##                       0                       0                       0 
##              numhh_list   ohp_all_ever_inperson         doc_num_mod_inp 
##                       0                       0                      64 
##          ed_num_mod_inp      hosp_num_mod_inp_2   num_visit_pre_cens_ed 
##                      47                      47                    2055 
##       any_depres_pre_ed        charg_tot_pre_ed            charg_tot_ed 
##                    2050                    2053                    2055 
##     ed_charg_tot_pre_ed         ed_charg_tot_ed                  lessHS 
##                    2058                    2056                       0 
##                 HSorGED 
##                       0

Convert types

Convert all columns to numeric, then convert numhh_list to factor.

#####STEP 1-4: Convert types #####
# Convert all columns to numeric
i <- c(1:ncol(newd_adjusted))
newd_adjusted[, i] <- apply(newd_adjusted[, i], 2,         
                        function(x) as.numeric(as.character(x)))

# Convert numhh_list to factor
newd_adjusted$numhh_list <- as.factor(newd_adjusted$numhh_list)

# Check the structure after type conversion
print("Information on newd_adjusted after converting to numeric (factor for numhh_list):")
## [1] "Information on newd_adjusted after converting to numeric (factor for numhh_list):"
glimpse(newd_adjusted)
## Rows: 12,208
## Columns: 58
## $ person_id               <dbl> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, …
## $ weight_total_inp        <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033…
## $ gender_inp              <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1…
## $ age_inp                 <dbl> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25…
## $ ast_dx_pre_lottery      <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0…
## $ dia_dx_pre_lottery      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hbp_dx_pre_lottery      <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0…
## $ chl_dx_pre_lottery      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ami_dx_pre_lottery      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chf_dx_pre_lottery      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ emp_dx_pre_lottery      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ kid_dx_pre_lottery      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cancer_dx_pre_lottery   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1…
## $ dep_dx_pre_lottery      <dbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1…
## $ dia_dx_post_lottery     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hbp_dx_post_lottery     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ chl_dx_post_lottery     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dep_dx_post_lottery     <dbl> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ phqtot_inp              <dbl> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2…
## $ cvd_risk_point          <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120…
## $ doc_any_incl_probe_inp  <dbl> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1…
## $ ed_any_incl_probe_inp   <dbl> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0…
## $ hosp_any_incl_probe_inp <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ any_oop_spending        <dbl> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ catastrophic_exp_inp    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ owe_inp                 <dbl> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1…
## $ borrow_inp              <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0…
## $ hispanic_inp            <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ race_white_inp          <dbl> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0…
## $ race_black_inp          <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0…
## $ race_nwother_inp        <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ a1c_inp                 <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.44…
## $ hdl_inp                 <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.8…
## $ chl_inp                 <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.…
## $ bmi_inp                 <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.7…
## $ bp_sar_inp              <dbl> 144, 134, 126, 168, 119, 98, 108, 125, 100, 10…
## $ bp_dar_inp              <dbl> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 6…
## $ rx_any_mod_inp          <dbl> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1…
## $ rx_num_mod_inp          <dbl> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, …
## $ hbp_diure_med_inp       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0…
## $ antihyperlip_med_inp    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ diabetes_med_inp        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ antidep_med_inp         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ household_id            <dbl> 100005, 102094, 140688, 100017, 100018, 115253…
## $ treatment               <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0…
## $ numhh_list              <fct> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ohp_all_ever_inperson   <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1…
## $ doc_num_mod_inp         <dbl> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10…
## $ ed_num_mod_inp          <dbl> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, …
## $ hosp_num_mod_inp_2      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2…
## $ num_visit_pre_cens_ed   <dbl> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0,…
## $ any_depres_pre_ed       <dbl> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0,…
## $ charg_tot_pre_ed        <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9…
## $ charg_tot_ed            <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0…
## $ ed_charg_tot_pre_ed     <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4…
## $ ed_charg_tot_ed         <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0…
## $ lessHS                  <dbl> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1…
## $ HSorGED                 <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0…
# Check for missing values
print("Missing values in columns:")
## [1] "Missing values in columns:"
colSums(is.na(newd_adjusted))
##               person_id        weight_total_inp              gender_inp 
##                       0                       0                       0 
##                 age_inp      ast_dx_pre_lottery      dia_dx_pre_lottery 
##                       0                       0                       0 
##      hbp_dx_pre_lottery      chl_dx_pre_lottery      ami_dx_pre_lottery 
##                       0                       0                       0 
##      chf_dx_pre_lottery      emp_dx_pre_lottery      kid_dx_pre_lottery 
##                       0                       0                       0 
##   cancer_dx_pre_lottery      dep_dx_pre_lottery     dia_dx_post_lottery 
##                       0                       0                      41 
##     hbp_dx_post_lottery     chl_dx_post_lottery     dep_dx_post_lottery 
##                     281                     332                     131 
##              phqtot_inp          cvd_risk_point  doc_any_incl_probe_inp 
##                      64                    2801                      17 
##   ed_any_incl_probe_inp hosp_any_incl_probe_inp        any_oop_spending 
##                      18                      17                      28 
##    catastrophic_exp_inp                 owe_inp              borrow_inp 
##                     424                     114                      10 
##            hispanic_inp          race_white_inp          race_black_inp 
##                       0                       0                       0 
##        race_nwother_inp                 a1c_inp                 hdl_inp 
##                       0                      89                      57 
##                 chl_inp                 bmi_inp              bp_sar_inp 
##                      55                      54                      41 
##              bp_dar_inp          rx_any_mod_inp          rx_num_mod_inp 
##                      41                       2                     311 
##       hbp_diure_med_inp    antihyperlip_med_inp        diabetes_med_inp 
##                       0                       0                       0 
##         antidep_med_inp            household_id               treatment 
##                       0                       0                       0 
##              numhh_list   ohp_all_ever_inperson         doc_num_mod_inp 
##                       0                       0                      64 
##          ed_num_mod_inp      hosp_num_mod_inp_2   num_visit_pre_cens_ed 
##                      47                      47                    2055 
##       any_depres_pre_ed        charg_tot_pre_ed            charg_tot_ed 
##                    2050                    2053                    2055 
##     ed_charg_tot_pre_ed         ed_charg_tot_ed                  lessHS 
##                    2058                    2056                       0 
##                 HSorGED 
##                       0

Rename outcomes

#####STEP 1-5: Rename outcome variables #####
newd_adjusted <- newd_adjusted %>%
  rename(
    sbp = bp_sar_inp,
    dbp = bp_dar_inp,
    hypertension = hbp_dx_post_lottery,
    hypertension_med = hbp_diure_med_inp,
    chl_level = chl_inp,
    hdl_level = hdl_inp,
    highcholesterol = chl_dx_post_lottery,
    cholesterol_med = antihyperlip_med_inp,
    a1c = a1c_inp,
    diabetes = dia_dx_post_lottery,
    diabetes_med = diabetes_med_inp,
    bmi = bmi_inp,
    phq = phqtot_inp,
    depression = dep_dx_post_lottery,
    depression_med = antidep_med_inp,
    cvd_risk = cvd_risk_point,
    oop_spend = any_oop_spending,
    debt = owe_inp,
    borrow = borrow_inp,
    catastrophic = catastrophic_exp_inp,
    doc_num = doc_num_mod_inp,
    doc_any = doc_any_incl_probe_inp,
    ed_num = ed_num_mod_inp,
    ed_any = ed_any_incl_probe_inp,
    hosp_num = hosp_num_mod_inp_2,
    hosp_any = hosp_any_incl_probe_inp,
    prescriptions = rx_num_mod_inp,
    prescriptions_any = rx_any_mod_inp
  )

# Print dimensions and column names
print(sprintf("Dimentions of newd_adjusted: %d rows, %d columns", 
             dim(newd_adjusted)[1], dim(newd_adjusted)[2]))
## [1] "Dimentions of newd_adjusted: 12208 rows, 58 columns"
print("Column names of newd_adjusted:")
## [1] "Column names of newd_adjusted:"
colnames(newd_adjusted)
##  [1] "person_id"             "weight_total_inp"      "gender_inp"           
##  [4] "age_inp"               "ast_dx_pre_lottery"    "dia_dx_pre_lottery"   
##  [7] "hbp_dx_pre_lottery"    "chl_dx_pre_lottery"    "ami_dx_pre_lottery"   
## [10] "chf_dx_pre_lottery"    "emp_dx_pre_lottery"    "kid_dx_pre_lottery"   
## [13] "cancer_dx_pre_lottery" "dep_dx_pre_lottery"    "diabetes"             
## [16] "hypertension"          "highcholesterol"       "depression"           
## [19] "phq"                   "cvd_risk"              "doc_any"              
## [22] "ed_any"                "hosp_any"              "oop_spend"            
## [25] "catastrophic"          "debt"                  "borrow"               
## [28] "hispanic_inp"          "race_white_inp"        "race_black_inp"       
## [31] "race_nwother_inp"      "a1c"                   "hdl_level"            
## [34] "chl_level"             "bmi"                   "sbp"                  
## [37] "dbp"                   "prescriptions_any"     "prescriptions"        
## [40] "hypertension_med"      "cholesterol_med"       "diabetes_med"         
## [43] "depression_med"        "household_id"          "treatment"            
## [46] "numhh_list"            "ohp_all_ever_inperson" "doc_num"              
## [49] "ed_num"                "hosp_num"              "num_visit_pre_cens_ed"
## [52] "any_depres_pre_ed"     "charg_tot_pre_ed"      "charg_tot_ed"         
## [55] "ed_charg_tot_pre_ed"   "ed_charg_tot_ed"       "lessHS"               
## [58] "HSorGED"

Rename treatment

To avoid confusion in future scripts, changing name of “treatment” to “eligibility.”

#####STEP 1-6: Rename treatment #####
newd_adjusted <- newd_adjusted %>%
  rename(eligibility = treatment)

Create negative outcomes

#####STEP 1-7: Add negative treatment effects #####
# I only created negative values for objective measurements and costs where lower is always better.
newd_adjusted <- newd_adjusted %>%
  mutate(
    sbp_neg = -sbp,
    dbp_neg = -dbp,
    chl_level_neg = -chl_level,
    hdl_level_neg = -hdl_level,
    a1c_neg = - a1c,
    bmi_neg = -bmi,
    phq_neg = -phq,
    cvd_risk_neg = -cvd_risk,
    debt_neg = 1 - debt,
    borrow_neg = 1 - borrow,
    catastrophic_neg = 1 - catastrophic
  )

# Print summary statistics of original and negative outcome variables for mutated variables 
print("Summary statistics of original and negative outcome variables for mutated variables:")
## [1] "Summary statistics of original and negative outcome variables for mutated variables:"
# Blood pressure
print("Systolic blood pressure original/negative:")
## [1] "Systolic blood pressure original/negative:"
summary(newd_adjusted$sbp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##      72     108     117     119     128     229      41
summary(newd_adjusted$sbp_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -229    -128    -117    -119    -108     -72      41
print("Diastolic blood pressure original/negative:")
## [1] "Diastolic blood pressure original/negative:"
summary(newd_adjusted$dbp) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    39.0    67.0    75.0    75.8    83.0   158.0      41
summary(newd_adjusted$dbp_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -158.0   -83.0   -75.0   -75.8   -67.0   -39.0      41
# Cholesterol
print("Total cholesterol original/negative:")
## [1] "Total cholesterol original/negative:"
summary(newd_adjusted$chl_level)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     117     182     203     205     226     488      55
summary(newd_adjusted$chl_level_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -488    -226    -203    -205    -182    -117      55
print("HDL cholesterol original/negative:")
## [1] "HDL cholesterol original/negative:"
summary(newd_adjusted$hdl_level)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   -1.92   38.58   46.08   47.70   55.08  139.08      57
summary(newd_adjusted$hdl_level_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -139.08  -55.08  -46.08  -47.70  -38.58    1.92      57
# Other clinical measures
print("A1C original/negative:")
## [1] "A1C original/negative:"
summary(newd_adjusted$a1c)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    3.73    5.04    5.20    5.33    5.45   11.25      89
summary(newd_adjusted$a1c_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -11.25   -5.45   -5.20   -5.33   -5.04   -3.73      89
print("BMI original/negative:")
## [1] "BMI original/negative:"
summary(newd_adjusted$bmi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    14.3    24.5    28.5    29.9    33.7    93.9      54
summary(newd_adjusted$bmi_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   -93.9   -33.7   -28.5   -29.9   -24.5   -14.3      54
print("PHQ-9 depression score original/negative:")
## [1] "PHQ-9 depression score original/negative:"
summary(newd_adjusted$phq)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    2.00    5.00    6.82   10.00   24.00      64
summary(newd_adjusted$phq_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -24.00  -10.00   -5.00   -6.82   -2.00    0.00      64
print("Cardiovascular risk score original/negative:")
## [1] "Cardiovascular risk score original/negative:"
summary(newd_adjusted$cvd_risk)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0     0.0     0.1     0.1     0.1     0.3    2801
summary(newd_adjusted$cvd_risk_neg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -0.3    -0.1    -0.1    -0.1     0.0     0.0    2801
# Financial measures
print("Debt original/negative:")
## [1] "Debt original/negative:"
table(newd_adjusted$debt, useNA = "ifany")
## 
##    0    1 <NA> 
## 5468 6626  114
table(newd_adjusted$debt_neg, useNA = "ifany")
## 
##    0    1 <NA> 
## 6626 5468  114
print("Borrowing original/negative:")
## [1] "Borrowing original/negative:"
table(newd_adjusted$borrow, useNA = "ifany")
## 
##    0    1 <NA> 
## 9456 2742   10
table(newd_adjusted$borrow_neg, useNA = "ifany")
## 
##    0    1 <NA> 
## 2742 9456   10
print("Catastrophic expenditure original/negative:")
## [1] "Catastrophic expenditure original/negative:"
table(newd_adjusted$catastrophic, useNA = "ifany")
## 
##     0     1  <NA> 
## 11244   540   424
table(newd_adjusted$catastrophic_neg, useNA = "ifany")
## 
##     0     1  <NA> 
##   540 11244   424
# Print final data structure
print("Final data structure:")
## [1] "Final data structure:"
glimpse(newd_adjusted)
## Rows: 12,208
## Columns: 69
## $ person_id             <dbl> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp      <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ gender_inp            <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp               <dbl> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ ast_dx_pre_lottery    <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery    <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery    <dbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ diabetes              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension          <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol       <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression            <dbl> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq                   <dbl> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk              <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any               <dbl> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any                <dbl> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend             <dbl> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt                  <dbl> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow                <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ hispanic_inp          <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp        <dbl> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp        <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ a1c                   <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level             <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level             <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi                   <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp                   <dbl> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp                   <dbl> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any     <dbl> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions         <dbl> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id          <dbl> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility           <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ numhh_list            <fct> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ohp_all_ever_inperson <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num               <dbl> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num                <dbl> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ num_visit_pre_cens_ed <dbl> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0, 0…
## $ any_depres_pre_ed     <dbl> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0…
## $ charg_tot_pre_ed      <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9, …
## $ charg_tot_ed          <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_pre_ed   <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4, …
## $ ed_charg_tot_ed       <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ lessHS                <dbl> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED               <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ sbp_neg               <dbl> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg               <dbl> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg         <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg         <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg               <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg               <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg               <dbl> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg          <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg              <dbl> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg            <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …

Write output file

#####STEP 1-9: Write output file #####
write.csv(newd_adjusted, paste0(processedpath, "1_Cleaned_Wide_Dataset.csv"))