Load the data
#####STEP 2-1: Load data #####
newd_adjusted <- read.csv(paste0(processedpath, "1_Cleaned_Wide_Dataset.csv"))
print("Information on 1_Cleaned_Wide_Dataset:")
## [1] "Information on 1_Cleaned_Wide_Dataset:"
## Rows: 12,208
## Columns: 70
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ gender_inp <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ ast_dx_pre_lottery <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ diabetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ hispanic_inp <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ a1c <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ numhh_list <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ num_visit_pre_cens_ed <int> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0, 0…
## $ any_depres_pre_ed <int> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0…
## $ charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9, …
## $ charg_tot_ed <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4, …
## $ ed_charg_tot_ed <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ lessHS <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ sbp_neg <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
preimp_cov <- c("numhh_list", "gender_inp", "age_inp",
"hispanic_inp", "race_white_inp", "race_black_inp", "race_nwother_inp", "ast_dx_pre_lottery",
"dia_dx_pre_lottery", "hbp_dx_pre_lottery", "chl_dx_pre_lottery", "ami_dx_pre_lottery",
"chf_dx_pre_lottery", "emp_dx_pre_lottery", "kid_dx_pre_lottery", "cancer_dx_pre_lottery",
"dep_dx_pre_lottery", "charg_tot_pre_ed", "ed_charg_tot_pre_ed", "num_visit_pre_cens_ed", "any_depres_pre_ed",
"lessHS", "HSorGED")
# Data used for imputation
newd_bsl_preimp <- newd_adjusted[, preimp_cov]
# Get all column names from newd_adjusted that are not in preimp_cov
nonimp_cols <- setdiff(colnames(newd_adjusted), preimp_cov)
# Create data frame with non-imputed columns
newd_bsl_nonimp <- newd_adjusted[, nonimp_cols]
print("Information on data for imputation:")
## [1] "Information on data for imputation:"
## Rows: 12,208
## Columns: 23
## $ numhh_list <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, NA, 1715.3, 0.0, 0.0, 5743.9, …
## $ ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, NA, 1006.3, 0.0, 0.0, 4542.4, …
## $ num_visit_pre_cens_ed <int> 0, 0, 1, NA, 2, 0, 0, 7, 0, NA, 0, 0, 1, 0, 0, 0…
## $ any_depres_pre_ed <int> 0, 0, 0, NA, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0…
## $ lessHS <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
print("Missing values in data for imputation:")
## [1] "Missing values in data for imputation:"
colSums(is.na(newd_bsl_preimp))
## numhh_list gender_inp age_inp
## 0 0 0
## hispanic_inp race_white_inp race_black_inp
## 0 0 0
## race_nwother_inp ast_dx_pre_lottery dia_dx_pre_lottery
## 0 0 0
## hbp_dx_pre_lottery chl_dx_pre_lottery ami_dx_pre_lottery
## 0 0 0
## chf_dx_pre_lottery emp_dx_pre_lottery kid_dx_pre_lottery
## 0 0 0
## cancer_dx_pre_lottery dep_dx_pre_lottery charg_tot_pre_ed
## 0 0 2053
## ed_charg_tot_pre_ed num_visit_pre_cens_ed any_depres_pre_ed
## 2058 2055 2050
## lessHS HSorGED
## 0 0
print("Information on data not for imputation:")
## [1] "Information on data not for imputation:"
## Rows: 12,208
## Columns: 47
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
print("Missing values in data not for imputation:")
## [1] "Missing values in data not for imputation:"
colSums(is.na(newd_bsl_nonimp))
## X person_id weight_total_inp
## 0 0 0
## diabetes hypertension highcholesterol
## 41 281 332
## depression phq cvd_risk
## 131 64 2801
## doc_any ed_any hosp_any
## 17 18 17
## oop_spend catastrophic debt
## 28 424 114
## borrow a1c hdl_level
## 10 89 57
## chl_level bmi sbp
## 55 54 41
## dbp prescriptions_any prescriptions
## 41 2 311
## hypertension_med cholesterol_med diabetes_med
## 0 0 0
## depression_med household_id eligibility
## 0 0 0
## ohp_all_ever_inperson doc_num ed_num
## 0 64 47
## hosp_num charg_tot_ed ed_charg_tot_ed
## 47 2055 2056
## sbp_neg dbp_neg chl_level_neg
## 41 41 55
## hdl_level_neg a1c_neg bmi_neg
## 57 89 54
## phq_neg cvd_risk_neg debt_neg
## 64 2801 114
## borrow_neg catastrophic_neg
## 10 424
Perform imputation
#####STEP 2-2: Random forest imputation #####
newd_preimp <- missRanger(newd_bsl_preimp,
num.trees = 100,
pmm.k = 5,
verbose = 0,
seed = seedset,
num.threads = numthreadsset)
# Convert 'numhh_list' to factor
newd_preimp$numhh_list <- as.factor(newd_preimp$numhh_list)
# Combine imputed data and nonimputed data
newd_imp <- cbind(newd_preimp, newd_bsl_nonimp)
print("Summary statistics of the imputed data:")
## [1] "Summary statistics of the imputed data:"
## numhh_list gender_inp age_inp hispanic_inp race_white_inp
## 1:9239 Min. :0.000 Min. :19.0 Min. :0.00 Min. :0.000
## 2:2951 1st Qu.:0.000 1st Qu.:31.0 1st Qu.:0.00 1st Qu.:0.000
## 3: 18 Median :1.000 Median :41.0 Median :0.00 Median :1.000
## Mean :0.566 Mean :40.8 Mean :0.18 Mean :0.687
## 3rd Qu.:1.000 3rd Qu.:50.0 3rd Qu.:0.00 3rd Qu.:1.000
## Max. :1.000 Max. :71.0 Max. :1.00 Max. :1.000
## race_black_inp race_nwother_inp ast_dx_pre_lottery dia_dx_pre_lottery
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.000 Median :0.0000
## Mean :0.103 Mean :0.144 Mean :0.193 Mean :0.0711
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.0000
## hbp_dx_pre_lottery chl_dx_pre_lottery ami_dx_pre_lottery chf_dx_pre_lottery
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.182 Mean :0.127 Mean :0.0197 Mean :0.0111
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.000 Max. :1.0000 Max. :1.0000
## emp_dx_pre_lottery kid_dx_pre_lottery cancer_dx_pre_lottery dep_dx_pre_lottery
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.022 Mean :0.0186 Mean :0.0428 Mean :0.341
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.000
## charg_tot_pre_ed ed_charg_tot_pre_ed num_visit_pre_cens_ed any_depres_pre_ed
## Min. : 0 Min. : 0 Min. : 0.000 Min. :0.0000
## 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.000 1st Qu.:0.0000
## Median : 0 Median : 0 Median : 0.000 Median :0.0000
## Mean : 2216 Mean : 913 Mean : 0.798 Mean :0.0157
## 3rd Qu.: 774 3rd Qu.: 626 3rd Qu.: 1.000 3rd Qu.:0.0000
## Max. :180055 Max. :41246 Max. :17.000 Max. :1.0000
## lessHS HSorGED
## Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000
## Mean :0.205 Mean :0.454
## 3rd Qu.:0.000 3rd Qu.:1.000
## Max. :1.000 Max. :1.000
print("Missing values in the imputed data:")
## [1] "Missing values in the imputed data:"
colSums(is.na(newd_preimp))
## numhh_list gender_inp age_inp
## 0 0 0
## hispanic_inp race_white_inp race_black_inp
## 0 0 0
## race_nwother_inp ast_dx_pre_lottery dia_dx_pre_lottery
## 0 0 0
## hbp_dx_pre_lottery chl_dx_pre_lottery ami_dx_pre_lottery
## 0 0 0
## chf_dx_pre_lottery emp_dx_pre_lottery kid_dx_pre_lottery
## 0 0 0
## cancer_dx_pre_lottery dep_dx_pre_lottery charg_tot_pre_ed
## 0 0 0
## ed_charg_tot_pre_ed num_visit_pre_cens_ed any_depres_pre_ed
## 0 0 0
## lessHS HSorGED
## 0 0
print("Information on newd_imp:")
## [1] "Information on newd_imp:"
## Rows: 12,208
## Columns: 70
## $ numhh_list <fct> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, 0.0, 0.0, 5743.9,…
## $ ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, 0.0, 0.0, 4542.4,…
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 2, 0, 0, 1, 0, 0, 0, …
## $ any_depres_pre_ed <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ lessHS <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
print("Missing values in newd_imp:")
## [1] "Missing values in newd_imp:"
## numhh_list gender_inp age_inp
## 0 0 0
## hispanic_inp race_white_inp race_black_inp
## 0 0 0
## race_nwother_inp ast_dx_pre_lottery dia_dx_pre_lottery
## 0 0 0
## hbp_dx_pre_lottery chl_dx_pre_lottery ami_dx_pre_lottery
## 0 0 0
## chf_dx_pre_lottery emp_dx_pre_lottery kid_dx_pre_lottery
## 0 0 0
## cancer_dx_pre_lottery dep_dx_pre_lottery charg_tot_pre_ed
## 0 0 0
## ed_charg_tot_pre_ed num_visit_pre_cens_ed any_depres_pre_ed
## 0 0 0
## lessHS HSorGED X
## 0 0 0
## person_id weight_total_inp diabetes
## 0 0 41
## hypertension highcholesterol depression
## 281 332 131
## phq cvd_risk doc_any
## 64 2801 17
## ed_any hosp_any oop_spend
## 18 17 28
## catastrophic debt borrow
## 424 114 10
## a1c hdl_level chl_level
## 89 57 55
## bmi sbp dbp
## 54 41 41
## prescriptions_any prescriptions hypertension_med
## 2 311 0
## cholesterol_med diabetes_med depression_med
## 0 0 0
## household_id eligibility ohp_all_ever_inperson
## 0 0 0
## doc_num ed_num hosp_num
## 64 47 47
## charg_tot_ed ed_charg_tot_ed sbp_neg
## 2055 2056 41
## dbp_neg chl_level_neg hdl_level_neg
## 41 55 57
## a1c_neg bmi_neg phq_neg
## 89 54 64
## cvd_risk_neg debt_neg borrow_neg
## 2801 114 10
## catastrophic_neg
## 424