Load Required Libraries
library(synthpop)
library(tidyverse)
library(missRanger)
library(FactoMineR)
set.seed(54321)
Load Data
load(file = "data_for_rf.RData")
Impute Missing Values
# Select the first 32 columns and drop rows with missing values in the 'pri' and 'min_Alb' columns.
data <- data_for_rf %>% select(1:32) %>%
drop_na(pri, min_Alb) %>%
mutate(age = as.numeric(age))
pri <- data$pri
min_Alb <- data$min_Alb
# Select all columns except 'pri' and 'min_Alb'.
data.0 <- data %>%
select(-c(pri, min_Alb))
# Impute missing values using missRanger.
data.impute.0 <- missRanger(data.0)
##
## Missing value imputation by random forests
##
## Variables to impute: lab_count, rx_count, BMI, min_pH, min_Hgb, max_Lactate, min_dbp, min_hgb_A1C, max_HR, min_HR, min_O2_sat, min_sbp, min_mbp, min_Abpm, min_Abps, min_Abpd, max_WBC, max_temp, max_PEEP, min_pO2, min_BE, max_pCO2, min_Braden_Activity, min_Braden_Friction_Shear, min_Braden_Mobility, min_Braden_Moisture, min_Braden_Nutrition, min_Braden_Sensory_Perception
## Variables used to impute: lab_count, rx_count, BMI, min_pH, age, min_Hgb, max_Lactate, min_dbp, previous_visits, min_hgb_A1C, max_HR, min_HR, min_O2_sat, min_sbp, min_mbp, min_Abpm, min_Abps, min_Abpd, max_WBC, max_temp, max_PEEP, min_pO2, min_BE, max_pCO2, min_Braden_Activity, min_Braden_Friction_Shear, min_Braden_Mobility, min_Braden_Moisture, min_Braden_Nutrition, min_Braden_Sensory_Perception
## iter 1: ............................
## iter 2: ............................
## iter 3: ............................
## iter 4: ............................
# Combine imputed data with 'pri' and 'min_Alb' columns and convert 'pri' to a factor.
data.impute <- cbind(data.impute.0, pri, min_Alb) %>%
mutate(pri = as.factor(pri))
Generate Synthetic Data
synth.MIMIC <- syn(data.impute)
##
## Synthesis
## -----------
## lab_count rx_count BMI min_pH age min_Hgb max_Lactate min_dbp previous_visits min_hgb_A1C
## max_HR min_HR min_O2_sat min_sbp min_mbp min_Abpm min_Abps min_Abpd max_WBC max_temp
## max_PEEP min_pO2 min_BE max_pCO2 min_Braden_Activity min_Braden_Friction_Shear min_Braden_Mobility min_Braden_Moisture min_Braden_Nutrition min_Braden_Sensory_Perception
## pri min_Alb
synth_MIMIC <- synth.MIMIC$syn
Plot Original and Synthetic Data
plot(data.impute$min_HR, data.impute$min_Alb)
abline(lm(data.impute$min_Alb ~ data.impute$min_HR), lty = 2)

plot(synth_MIMIC$min_HR, synth_MIMIC$min_Alb)
abline(lm(synth_MIMIC$min_Alb ~ synth_MIMIC$min_HR), lty = 2)

Principal Component Analysis
# Remove 'pri' column from original and synthetic data.
pca.data.impute <- data.impute %>% select(-pri)
pca.synth_MIMIC <- synth_MIMIC %>% select(-pri)
# Perform PCA on original and synthetic data.
pca_result.mimic <- PCA(pca.data.impute, graph = TRUE)


pca_result.synth <- PCA(pca.synth_MIMIC, graph = TRUE)

Export datasets
save(data.impute, file="data_for_ML.impute.RData")
save(synth_MIMIC, file="data_for_ML.synth.RData")
XAI with Synth RF
library(ranger) # for random forest models
library(DALEX) # for model explainers
library(modelStudio) # for interactive model explainers studio
# Convert the 'pri' variable to a numeric vector with 0 and 1 values
synth_MIMIC_num <- synth_MIMIC %>%
mutate(pri = as.numeric(pri) - 1)
# Train a random forest model using ranger, with 'pri' as the response variable
model_rf <- ranger(pri ~ ., data = synth_MIMIC_num)
# Create a DALEX explainer for the random forest model
explainer_rf <- DALEX::explain(model_rf,
data = synth_MIMIC_num,
y = synth_MIMIC_num$pri,
label = "MIMIC --> Synth")
## Preparation of a new explainer is initiated
## -> model label : MIMIC --> Synth
## -> data : 3473 rows 32 cols
## -> target variable : 3473 values
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package ranger , ver. 0.14.1 , task regression ( default )
## -> predicted values : numerical, min = 0 , mean = 0.09058859 , max = 0.787
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -0.267 , mean = -0.004495873 , max = 0.5463667
## A new explainer has been created!
# Use modelStudio to create an interactive studio with model explainers
# modelStudio(explainer_rf)
https://rpubs.com/andystats/XAI_mimic4_synth
XAI with Synth XGBoost
library(xgboost) # for XGBoost models
library(DALEX) # for model explainers
library(modelStudio) # for interactive model explainers studio
# Convert the 'pri' variable to a numeric vector with 0 and 1 values
synth_MIMIC_num <- synth_MIMIC %>%
mutate(pri = as.numeric(pri) - 1)
# Prepare data for XGBoost
data_matrix <- xgb.DMatrix(data.matrix(synth_MIMIC_num[,-which(names(synth_MIMIC_num) == "pri")]),
label = synth_MIMIC_num$pri)
# Set XGBoost parameters
xgb_params <- list(
objective = "binary:logistic",
eval_metric = "logloss",
max_depth = 6,
eta = 0.3
)
# Train an XGBoost model
model_xgb <- xgb.train(params = xgb_params,
data = data_matrix,
nrounds = 100)
# Create a custom predict function for XGBoost
xgb_predict <- function(model, newdata) {
newdata_matrix <- xgb.DMatrix(data.matrix(newdata))
return(predict(model, newdata = newdata_matrix))
}
# Create a DALEX explainer for the XGBoost model
explainer_xgb <- DALEX::explain(model = model_xgb,
data = synth_MIMIC_num[,-which(names(synth_MIMIC_num) == "pri")],
y = synth_MIMIC_num$pri,
predict_function = xgb_predict,
label = "MIMIC --> Synth (XGBoost)")
## Preparation of a new explainer is initiated
## -> model label : MIMIC --> Synth (XGBoost)
## -> data : 3473 rows 31 cols
## -> target variable : 3473 values
## -> predict function : xgb_predict
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package Model of class: xgb.Booster package unrecognized , ver. Unknown , task regression ( default )
## -> predicted values : numerical, min = 2.325129e-05 , mean = 0.08610184 , max = 0.9923863
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -0.09597715 , mean = -9.125908e-06 , max = 0.1123768
## A new explainer has been created!
# Use modelStudio to create an interactive studio with model explainers
# modelStudio(explainer_xgb)
https://rpubs.com/andystats/XAI_mimic4_synth_xgboost