Load the data
#####STEP 2-1: Load data #####
newd_adjusted <- read.csv(paste0(processedpath, "1_Cleaned_Wide_Dataset.csv"))
print("Information on 1_Cleaned_Wide_Dataset:")
## [1] "Information on 1_Cleaned_Wide_Dataset:"
## Rows: 12,208
## Columns: 70
## $ X                     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id             <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp      <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ gender_inp            <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp               <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ ast_dx_pre_lottery    <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ diabetes              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension          <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol       <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression            <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq                   <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk              <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any               <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any                <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend             <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt                  <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow                <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ hispanic_inp          <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp        <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp        <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp      <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ a1c                   <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level             <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level             <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi                   <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp                   <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp                   <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any     <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions         <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id          <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ numhh_list            <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num               <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num                <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ num_visit_pre_cens_ed <int> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0, 0…
## $ any_depres_pre_ed     <int> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0…
## $ charg_tot_pre_ed      <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9, …
## $ charg_tot_ed          <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_pre_ed   <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4, …
## $ ed_charg_tot_ed       <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ lessHS                <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED               <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ sbp_neg               <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg               <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg         <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg         <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg               <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg               <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg               <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg          <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg              <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg            <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
preimp_cov <- c("numhh_list", "gender_inp", "age_inp",
                "hispanic_inp", "race_white_inp", "race_black_inp", "race_nwother_inp", "ast_dx_pre_lottery",
                "dia_dx_pre_lottery", "hbp_dx_pre_lottery", "chl_dx_pre_lottery", "ami_dx_pre_lottery",
                "chf_dx_pre_lottery", "emp_dx_pre_lottery", "kid_dx_pre_lottery", "cancer_dx_pre_lottery",
                "dep_dx_pre_lottery", "charg_tot_pre_ed", "ed_charg_tot_pre_ed", "num_visit_pre_cens_ed", "any_depres_pre_ed",
                "lessHS", "HSorGED")
# Data used for imputation
newd_bsl_preimp <- newd_adjusted[, preimp_cov]
# Get all column names from newd_adjusted that are not in preimp_cov
nonimp_cols <- setdiff(colnames(newd_adjusted), preimp_cov)
# Create data frame with non-imputed columns
newd_bsl_nonimp <- newd_adjusted[, nonimp_cols]
print("Information on data for imputation:")
## [1] "Information on data for imputation:"
## Rows: 12,208
## Columns: 23
## $ numhh_list            <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp            <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp               <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp          <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp        <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp        <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp      <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery    <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed      <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9, …
## $ ed_charg_tot_pre_ed   <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4, …
## $ num_visit_pre_cens_ed <int> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0, 0…
## $ any_depres_pre_ed     <int> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0…
## $ lessHS                <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED               <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
print("Missing values in data for imputation:")
## [1] "Missing values in data for imputation:"
colSums(is.na(newd_bsl_preimp))
##            numhh_list            gender_inp               age_inp 
##                     0                     0                     0 
##          hispanic_inp        race_white_inp        race_black_inp 
##                     0                     0                     0 
##      race_nwother_inp    ast_dx_pre_lottery    dia_dx_pre_lottery 
##                     0                     0                     0 
##    hbp_dx_pre_lottery    chl_dx_pre_lottery    ami_dx_pre_lottery 
##                     0                     0                     0 
##    chf_dx_pre_lottery    emp_dx_pre_lottery    kid_dx_pre_lottery 
##                     0                     0                     0 
## cancer_dx_pre_lottery    dep_dx_pre_lottery      charg_tot_pre_ed 
##                     0                     0                  2053 
##   ed_charg_tot_pre_ed num_visit_pre_cens_ed     any_depres_pre_ed 
##                  2058                  2055                  2050 
##                lessHS               HSorGED 
##                     0                     0
print("Information on data not for imputation:")
## [1] "Information on data not for imputation:"
## Rows: 12,208
## Columns: 47
## $ X                     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id             <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp      <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension          <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol       <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression            <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq                   <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk              <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any               <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any                <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend             <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt                  <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow                <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c                   <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level             <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level             <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi                   <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp                   <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp                   <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any     <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions         <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id          <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num               <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num                <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed          <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed       <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg               <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg               <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg         <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg         <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg               <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg               <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg               <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg          <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg              <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg            <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
print("Missing values in data not for imputation:")
## [1] "Missing values in data not for imputation:"
colSums(is.na(newd_bsl_nonimp))
##                     X             person_id      weight_total_inp 
##                     0                     0                     0 
##              diabetes          hypertension       highcholesterol 
##                    41                   281                   332 
##            depression                   phq              cvd_risk 
##                   131                    64                  2801 
##               doc_any                ed_any              hosp_any 
##                    17                    18                    17 
##             oop_spend          catastrophic                  debt 
##                    28                   424                   114 
##                borrow                   a1c             hdl_level 
##                    10                    89                    57 
##             chl_level                   bmi                   sbp 
##                    55                    54                    41 
##                   dbp     prescriptions_any         prescriptions 
##                    41                     2                   311 
##      hypertension_med       cholesterol_med          diabetes_med 
##                     0                     0                     0 
##        depression_med          household_id           eligibility 
##                     0                     0                     0 
## ohp_all_ever_inperson               doc_num                ed_num 
##                     0                    64                    47 
##              hosp_num          charg_tot_ed       ed_charg_tot_ed 
##                    47                  2055                  2056 
##               sbp_neg               dbp_neg         chl_level_neg 
##                    41                    41                    55 
##         hdl_level_neg               a1c_neg               bmi_neg 
##                    57                    89                    54 
##               phq_neg          cvd_risk_neg              debt_neg 
##                    64                  2801                   114 
##            borrow_neg      catastrophic_neg 
##                    10                   424
 
Perform imputation
#####STEP 2-2: Random forest imputation #####
newd_preimp <- missRanger(newd_bsl_preimp, 
                         num.trees = 100, 
                         pmm.k = 5, 
                         verbose = 0, 
                         seed = seedset, 
                         num.threads = numthreadsset)
# Convert 'numhh_list' to factor
newd_preimp$numhh_list <- as.factor(newd_preimp$numhh_list)
# Combine imputed data and nonimputed data
newd_imp <- cbind(newd_preimp, newd_bsl_nonimp)
print("Summary statistics of the imputed data:")
## [1] "Summary statistics of the imputed data:"
##  numhh_list   gender_inp       age_inp      hispanic_inp  race_white_inp 
##  1:9239     Min.   :0.000   Min.   :19.0   Min.   :0.00   Min.   :0.000  
##  2:2951     1st Qu.:0.000   1st Qu.:31.0   1st Qu.:0.00   1st Qu.:0.000  
##  3:  18     Median :1.000   Median :41.0   Median :0.00   Median :1.000  
##             Mean   :0.566   Mean   :40.8   Mean   :0.18   Mean   :0.687  
##             3rd Qu.:1.000   3rd Qu.:50.0   3rd Qu.:0.00   3rd Qu.:1.000  
##             Max.   :1.000   Max.   :71.0   Max.   :1.00   Max.   :1.000  
##  race_black_inp  race_nwother_inp ast_dx_pre_lottery dia_dx_pre_lottery
##  Min.   :0.000   Min.   :0.000    Min.   :0.000      Min.   :0.0000    
##  1st Qu.:0.000   1st Qu.:0.000    1st Qu.:0.000      1st Qu.:0.0000    
##  Median :0.000   Median :0.000    Median :0.000      Median :0.0000    
##  Mean   :0.103   Mean   :0.144    Mean   :0.193      Mean   :0.0711    
##  3rd Qu.:0.000   3rd Qu.:0.000    3rd Qu.:0.000      3rd Qu.:0.0000    
##  Max.   :1.000   Max.   :1.000    Max.   :1.000      Max.   :1.0000    
##  hbp_dx_pre_lottery chl_dx_pre_lottery ami_dx_pre_lottery chf_dx_pre_lottery
##  Min.   :0.000      Min.   :0.000      Min.   :0.0000     Min.   :0.0000    
##  1st Qu.:0.000      1st Qu.:0.000      1st Qu.:0.0000     1st Qu.:0.0000    
##  Median :0.000      Median :0.000      Median :0.0000     Median :0.0000    
##  Mean   :0.182      Mean   :0.127      Mean   :0.0197     Mean   :0.0111    
##  3rd Qu.:0.000      3rd Qu.:0.000      3rd Qu.:0.0000     3rd Qu.:0.0000    
##  Max.   :1.000      Max.   :1.000      Max.   :1.0000     Max.   :1.0000    
##  emp_dx_pre_lottery kid_dx_pre_lottery cancer_dx_pre_lottery dep_dx_pre_lottery
##  Min.   :0.000      Min.   :0.0000     Min.   :0.0000        Min.   :0.000     
##  1st Qu.:0.000      1st Qu.:0.0000     1st Qu.:0.0000        1st Qu.:0.000     
##  Median :0.000      Median :0.0000     Median :0.0000        Median :0.000     
##  Mean   :0.022      Mean   :0.0186     Mean   :0.0428        Mean   :0.341     
##  3rd Qu.:0.000      3rd Qu.:0.0000     3rd Qu.:0.0000        3rd Qu.:1.000     
##  Max.   :1.000      Max.   :1.0000     Max.   :1.0000        Max.   :1.000     
##  charg_tot_pre_ed ed_charg_tot_pre_ed num_visit_pre_cens_ed any_depres_pre_ed
##  Min.   :     0   Min.   :    0       Min.   : 0.000        Min.   :0.0000   
##  1st Qu.:     0   1st Qu.:    0       1st Qu.: 0.000        1st Qu.:0.0000   
##  Median :     0   Median :    0       Median : 0.000        Median :0.0000   
##  Mean   :  2216   Mean   :  913       Mean   : 0.798        Mean   :0.0157   
##  3rd Qu.:   774   3rd Qu.:  626       3rd Qu.: 1.000        3rd Qu.:0.0000   
##  Max.   :180055   Max.   :41246       Max.   :17.000        Max.   :1.0000   
##      lessHS         HSorGED     
##  Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.000   1st Qu.:0.000  
##  Median :0.000   Median :0.000  
##  Mean   :0.205   Mean   :0.454  
##  3rd Qu.:0.000   3rd Qu.:1.000  
##  Max.   :1.000   Max.   :1.000
print("Missing values in the imputed data:")
## [1] "Missing values in the imputed data:"
colSums(is.na(newd_preimp))
##            numhh_list            gender_inp               age_inp 
##                     0                     0                     0 
##          hispanic_inp        race_white_inp        race_black_inp 
##                     0                     0                     0 
##      race_nwother_inp    ast_dx_pre_lottery    dia_dx_pre_lottery 
##                     0                     0                     0 
##    hbp_dx_pre_lottery    chl_dx_pre_lottery    ami_dx_pre_lottery 
##                     0                     0                     0 
##    chf_dx_pre_lottery    emp_dx_pre_lottery    kid_dx_pre_lottery 
##                     0                     0                     0 
## cancer_dx_pre_lottery    dep_dx_pre_lottery      charg_tot_pre_ed 
##                     0                     0                     0 
##   ed_charg_tot_pre_ed num_visit_pre_cens_ed     any_depres_pre_ed 
##                     0                     0                     0 
##                lessHS               HSorGED 
##                     0                     0
print("Information on newd_imp:")
## [1] "Information on newd_imp:"
## Rows: 12,208
## Columns: 70
## $ numhh_list            <fct> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp            <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp               <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp          <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp        <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp        <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp      <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery    <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed      <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, 0.0, 0.0, 5743.9,…
## $ ed_charg_tot_pre_ed   <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, 0.0, 0.0, 4542.4,…
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 2, 0, 0, 1, 0, 0, 0, …
## $ any_depres_pre_ed     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ lessHS                <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED               <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ X                     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id             <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp      <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension          <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol       <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression            <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq                   <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk              <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any               <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any                <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend             <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt                  <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow                <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c                   <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level             <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level             <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi                   <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp                   <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp                   <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any     <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions         <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id          <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num               <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num                <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed          <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed       <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg               <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg               <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg         <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg         <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg               <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg               <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg               <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg          <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg              <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg            <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
print("Missing values in newd_imp:")
## [1] "Missing values in newd_imp:"
##            numhh_list            gender_inp               age_inp 
##                     0                     0                     0 
##          hispanic_inp        race_white_inp        race_black_inp 
##                     0                     0                     0 
##      race_nwother_inp    ast_dx_pre_lottery    dia_dx_pre_lottery 
##                     0                     0                     0 
##    hbp_dx_pre_lottery    chl_dx_pre_lottery    ami_dx_pre_lottery 
##                     0                     0                     0 
##    chf_dx_pre_lottery    emp_dx_pre_lottery    kid_dx_pre_lottery 
##                     0                     0                     0 
## cancer_dx_pre_lottery    dep_dx_pre_lottery      charg_tot_pre_ed 
##                     0                     0                     0 
##   ed_charg_tot_pre_ed num_visit_pre_cens_ed     any_depres_pre_ed 
##                     0                     0                     0 
##                lessHS               HSorGED                     X 
##                     0                     0                     0 
##             person_id      weight_total_inp              diabetes 
##                     0                     0                    41 
##          hypertension       highcholesterol            depression 
##                   281                   332                   131 
##                   phq              cvd_risk               doc_any 
##                    64                  2801                    17 
##                ed_any              hosp_any             oop_spend 
##                    18                    17                    28 
##          catastrophic                  debt                borrow 
##                   424                   114                    10 
##                   a1c             hdl_level             chl_level 
##                    89                    57                    55 
##                   bmi                   sbp                   dbp 
##                    54                    41                    41 
##     prescriptions_any         prescriptions      hypertension_med 
##                     2                   311                     0 
##       cholesterol_med          diabetes_med        depression_med 
##                     0                     0                     0 
##          household_id           eligibility ohp_all_ever_inperson 
##                     0                     0                     0 
##               doc_num                ed_num              hosp_num 
##                    64                    47                    47 
##          charg_tot_ed       ed_charg_tot_ed               sbp_neg 
##                  2055                  2056                    41 
##               dbp_neg         chl_level_neg         hdl_level_neg 
##                    41                    55                    57 
##               a1c_neg               bmi_neg               phq_neg 
##                    89                    54                    64 
##          cvd_risk_neg              debt_neg            borrow_neg 
##                  2801                   114                    10 
##      catastrophic_neg 
##                   424