Humana Case Competation

Data Import & Reshape

varnames <- c("ID", paste0(month.abb, "-01-2016"), paste0(month.abb, "-01-2017"), 
    paste0(month.abb, "-01-2018"), paste0(month.abb, "-01-2019"))
Demongraphics <- read_xlsx("Demongrahics.xlsx")
Claims <- read_xlsx("Claims.xlsx")
ER_Utilization <- read_xlsx("ER Utilization.xlsx")
Hospital_Admissions <- read_xlsx("Hospital Admissions.xlsx")
PCP_Visits <- read_xlsx("PCP Visits.xlsx")
Rx_Utilization <- read_xlsx("Rx Utilization.xlsx")
# Rename & Reshape====================================================
names(Claims) <- varnames
names(ER_Utilization) <- varnames
names(Hospital_Admissions) <- varnames
names(PCP_Visits) <- varnames
names(Rx_Utilization) <- varnames

Claims <- Claims %>% pivot_longer(`Jan-01-2016`:`Dec-01-2019`, names_to = "Date") %>% 
    rename(claim = value) %>% mutate(Date = mdy(Date), month = month(Date), year = year(Date))
ER_Utilization <- ER_Utilization %>% pivot_longer(`Jan-01-2016`:`Dec-01-2019`, names_to = "Date") %>% 
    rename(ER_utilization = value) %>% mutate(Date = mdy(Date), month = month(Date), 
    year = year(Date))
Hospital_Admissions <- Hospital_Admissions %>% pivot_longer(`Jan-01-2016`:`Dec-01-2019`, 
    names_to = "Date") %>% rename(Hospital_admissions = value) %>% mutate(Date = mdy(Date), 
    month = month(Date), year = year(Date))
PCP_Visits <- PCP_Visits %>% pivot_longer(`Jan-01-2016`:`Dec-01-2019`, names_to = "Date") %>% 
    rename(PCP_visits = value) %>% mutate(Date = mdy(Date), month = month(Date), 
    year = year(Date))
Rx_Utilization <- Rx_Utilization %>% pivot_longer(`Jan-01-2016`:`Dec-01-2019`, names_to = "Date") %>% 
    rename(Rx_utilization = value) %>% mutate(Date = mdy(Date), month = month(Date), 
    year = year(Date))
# Merge ==============================================================
merge_data <- Claims %>% left_join(ER_Utilization, by = c("ID", "Date", "year", "month")) %>% 
    left_join(Hospital_Admissions, by = c("ID", "Date", "year", "month")) %>% left_join(PCP_Visits, 
    by = c("ID", "Date", "year", "month")) %>% left_join(Rx_Utilization, by = c("ID", 
    "Date", "year", "month")) %>% select(ID, Date, month, year, everything())
# Control Group =====================================================
Control_Demongraphics <- Demongraphics %>% filter((Outreach_Transportation == 0) & 
    (Outreach_FinancialAssistance == 0) & (Outreach_Loneliness == 0) & (Outreach_FoodInsecurity == 
    0))
Train_Demongraphics <- Demongraphics %>% filter(!ID %in% Control_Demongraphics$ID)
Train_group <- merge_data %>% filter(!ID %in% Control_Demongraphics$ID)
Control_group <- merge_data %>% filter(ID %in% Control_Demongraphics$ID)
# 
write.csv(Control_Demongraphics, "Control_deomongraphics.csv")
write.csv(Train_Demongraphics, "Train_Demongraphics.csv")
write.csv(Train_group, "Train_group.csv")
write.csv(Control_group, "Control_group.csv")
# Data Cleaning Train
Train_Demongraphics <- Train_Demongraphics %>% filter(Low_Income %in% c("N", "Y"), 
    Region != "Unknown", Rural != "Unknown", Gender %in% c("F", "M"))
Train_group <- Train_group %>% filter(ID %in% Train_Demongraphics$ID)
## Control
Control_Demongraphics <- Control_Demongraphics %>% filter(Region != "Unknown", Rural != 
    "Unknown")
Control_group <- Control_group %>% filter(ID %in% Control_Demongraphics$ID)

EDA

Net

2020-02-29