install.packages("readxl")
install.packages("nloptr")
install.packages(c("rpart", "xgboost", "rstanarm", "lme4"))
install.packages("embed")
install.packages("BiocManager", suppressUpdates=TRUE, quiet = TRUE)
BiocManager::install('mixOmics', update = FALSE)
install.packages("ranger")
install.packages("vip")
install.packages("stacks")
install.packages("kernlab")
install.packages("xgboost")
install.packages("kknn")
install.packages("glmnet")
install.packages("plsmod")
install.packages("tictoc")
install.packages("Rtsne")
install.packages("plotly")
install.packages("tidymodels", force = TRUE)
install.packages("tidyverse")
download.file("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6342642/bin/NIHMS1510763-supplement-Dataset_2.xlsx", destfile = "/tmp/file.xlsx")
df <- read_excel("/tmp/file.xlsx", skip=1)
df_long <- df %>% pivot_longer(-`# Feature / Sample`, names_to="PatientID") %>%
pivot_wider(names_from=`# Feature / Sample`, values_from="value") %>% select(-Fecal.Calprotectin)
PRISM <- df %>% select(starts_with('PRISM')) %>% names()
data_prism <- df_long %>% filter(PatientID %in% PRISM )
data_validate <- df_long %>% filter(!PatientID %in% PRISM) %>% replace_na(list(immunosuppressant="No", mesalamine="No", steroids="No"))
tidy_data <- function(untidy) {
untidy %>%
mutate(PatientID = as.factor(PatientID),
Age = as.numeric(Age),
Diagnosis = as.factor(Diagnosis),
antibiotic = as.factor(antibiotic),
mesalamine = as.factor(mesalamine),
steroids = as.factor(steroids),
immunosuppressant = as.factor(immunosuppressant)) -> formatted
w <- which(sapply(formatted, class ) == 'character' )
formatted[w] <- lapply(formatted[w], function(x) as.numeric(x))
return(formatted)
}
data_prism_formatted <- tidy_data(data_prism) %>% na.omit()
data_validate_formatted <- tidy_data(data_validate) %>% na.omit()
colnames(data_prism_formatted) <- make.names(colnames(data_prism_formatted))
colnames(data_validate_formatted) <- make.names(colnames(data_validate_formatted))
train_df <- data_prism_formatted %>% select(-PatientID) %>%
mutate(Diagnosis = factor(Diagnosis, levels =c("Control","CD", "UC"), labels =c(1,2,3)))
test_df <- data_validate_formatted %>% select(-PatientID,-Diagnosis) %>% na.omit()
test_df_truth <- data_validate_formatted %>% select(PatientID, Diagnosis) %>%
mutate(Diagnosis = factor(Diagnosis, levels =c("Control","CD", "UC"), labels =c(1,2,3))) %>%
mutate(Cohort = case_when(str_detect(PatientID,"LLDeep") == TRUE ~ "Deep",
TRUE ~ "UMCG"))
folds <- train_df %>% vfold_cv(5, strata = Diagnosis)
data_combined <- bind_rows(data_prism_formatted, data_validate_formatted) %>%
mutate(Cohort = case_when(
str_detect(PatientID, "LLDeep") == TRUE ~ "Deep",
str_detect(PatientID, "PRISM") == TRUE ~ "PRISM",
TRUE ~ "UMCG")) %>% na.omit
data_combined_train <- data_combined %>% select(-c(PatientID, Diagnosis, Cohort))
head(data_combined)
## # A tibble: 6 × 8,856
## PatientID Age Diagnosis antibiotic immunosuppressant mesalamine steroids
## <fct> <dbl> <fct> <fct> <fct> <fct> <fct>
## 1 PRISM|7122 38 CD No Yes No No
## 2 PRISM|7147 50 CD No No Yes No
## 3 PRISM|7150 41 CD No Yes No No
## 4 PRISM|7153 51 CD No No Yes No
## 5 PRISM|7184 68 CD No No No No
## 6 PRISM|7238 67 CD Yes Yes No Yes
## # … with 8,849 more variables: C18.neg_Cluster_0001 <dbl>,
## # C18.neg_Cluster_0002 <dbl>, C18.neg_Cluster_0003 <dbl>,
## # C18.neg_Cluster_0004 <dbl>, C18.neg_Cluster_0005 <dbl>,
## # C18.neg_Cluster_0006 <dbl>, C18.neg_Cluster_0007 <dbl>,
## # C18.neg_Cluster_0008 <dbl>, C18.neg_Cluster_0009 <dbl>,
## # C18.neg_Cluster_0010 <dbl>, C18.neg_Cluster_0011 <dbl>,
## # C18.neg_Cluster_0012 <dbl>, C18.neg_Cluster_0013 <dbl>, …
data_tsne <- tibble(perplexity = c(5, 10, 15, 30, 45))
data_tsne <- data_tsne %>% mutate(TSNE = map(perplexity, ~Rtsne(data_combined_train, dims=2, PCA=F, perplexity=.x, max_iter=2000)))
for (i in 1:nrow(data_tsne)){
x = data_tsne$perplexity[[i]]
assign(paste0("tsne_df_tune_", x),
tibble(
X = data_tsne$TSNE[[i]]$Y[,1],
Y = data_tsne$TSNE[[i]]$Y[,2],
#Z = data_tsne$TSNE[[i]]$Y[,3],
Perplexity = data_tsne$perplexity[[i]],
Diagnosis = data_combined$Diagnosis,
Cohort = data_combined$Cohort,
ID = data_combined$PatientID
)
)
}
list_tsne_tune <- ls(pattern = "tsne_df_tune_")
tsne_combined <- bind_rows(mget(ls(pattern = "tsne_df_tune_")))
tsne_combined %>%
plot_ly(x = ~X, y = ~Y, frame = ~Perplexity, color = ~Diagnosis, symbol = ~Cohort,
colors = "Set1", type="scatter", mode="markers", ids=~ID, text = ~paste0("Sample: ", ID, "<br>",
"Diagnosis: ", Diagnosis),
hoverinfo = "text",
marker = list(sizemode = "diameter")) %>%
layout(xaxis = list(showgrid = F,
zeroline = F
),
yaxis = list(showgrid = F,
zeroline = F
)
) %>%
animation_opts(
redraw=FALSE,
frame = 2000,
transition = 500) %>%
animation_slider(
currentvalue = list(
prefix = "Perplexity: ",
font = list(color = "red")))
set.seed(42)
recipe_umap <- recipe(Diagnosis~., data = train_df) %>%
step_normalize(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors()) %>%
step_umap(all_predictors(), outcome = vars(Diagnosis), num_comp = 6, min_dist = 0.94396076, learn_rate = 6.193164e-03)
set.seed(42)
recipe_kpca <- recipe(Diagnosis~., data = train_df) %>%
step_normalize(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors()) %>%
step_kpca_rbf(all_predictors(), num_comp = 61, sigma = 1.782724e-06)
set.seed(42)
recipe_pca <- recipe(Diagnosis~., data = train_df) %>%
step_normalize(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors()) %>%
step_pca(all_predictors(), num_comp = 60)
train_umap <- bake(recipe_umap %>% prep(), train_df)
train_kpca <- bake(recipe_kpca %>% prep(), train_df)
train_pca <- bake(recipe_pca %>% prep(), train_df)
test_umap <- bake(recipe_umap %>% prep(), test_df)
test_kpca <- bake(recipe_kpca %>% prep(), test_df)
test_pca <- bake(recipe_pca %>% prep(), test_df)
train_1 <- train_umap %>% select(UMAP = UMAP1, Diagnosis) %>%
add_column(PC = train_pca$PC01) %>%
add_column(kPC = train_kpca$kPC01)
train_2 <- train_umap %>% select(UMAP2) %>%
add_column(PatientID = data_prism_formatted$PatientID) %>%
add_column(PC = train_pca$PC02) %>%
add_column(kPC = train_kpca$kPC02)
test_1 <- test_umap %>% select(UMAP = UMAP1) %>%
add_column(Diagnosis = test_df_truth$Diagnosis) %>%
add_column(kPC = test_kpca$kPC01) %>%
add_column(PC = test_pca$PC01)
test_2 <- test_umap %>% select(UMAP = UMAP2) %>%
add_column(PatientID = data_validate_formatted$PatientID) %>%
add_column(kPC = test_kpca$kPC02) %>%
add_column(PC = test_pca$PC02)
train_combined <- train_1 %>% pivot_longer(!Diagnosis, names_to = "Preprocessing", values_to = "Dim1") %>%
add_column(train_2 %>% pivot_longer(!PatientID,names_to = "Preprocessing", values_to = "Dim2") %>%
select(Dim2, PatientID)) %>%
mutate(Diagnosis = factor(Diagnosis, levels = c(1,2,3), labels = c("Control","CD", "UC")))
test_combined <- test_1 %>% pivot_longer(!Diagnosis, names_to = "Preprocessing", values_to = "Dim1") %>%
add_column(test_2 %>% pivot_longer(!PatientID,names_to = "Preprocessing", values_to = "Dim2") %>%
select(Dim2, PatientID)) %>%
mutate(Diagnosis = factor(Diagnosis, levels = c(1,2,3), labels = c("Control","CD", "UC")))
combined <- train_combined %>% bind_rows(test_combined, .id = c("Group")) %>%
mutate(Group = case_when(Group==1 ~ "Discovery",
TRUE ~ "Validate"))
head(combined)
## # A tibble: 6 × 6
## Group Diagnosis Preprocessing Dim1 Dim2 PatientID
## <chr> <fct> <chr> <dbl> <dbl> <fct>
## 1 Discovery CD UMAP -1.40 -1.39 PRISM|7122
## 2 Discovery CD PC -18.2 -16.7 PRISM|7122
## 3 Discovery CD kPC 0.414 0.386 PRISM|7122
## 4 Discovery CD UMAP -0.528 -0.130 PRISM|7147
## 5 Discovery CD PC -15.0 26.0 PRISM|7147
## 6 Discovery CD kPC 0.344 -0.583 PRISM|7147
combined %>%
plot_ly(x = ~Dim1, y = ~Dim2, frame = ~Preprocessing, color = ~Diagnosis, symbol = ~Group,
colors = "Set1", type="scatter", mode="markers") %>%
layout(xaxis = list(showgrid = F,
zeroline = F,
autorange = T
),
yaxis = list(showgrid = F,
zeroline = F,
autorange = T
)
) %>%
animation_opts(
redraw=FALSE,
frame = 2000,
transition = 500) %>%
animation_slider(
currentvalue = list(
prefix = "Preprocessing: ",
font = list(color = "red")))
data_ml <- tibble(classifier = c("RF", "PLSDA", "KNN", "SVM", "XGB", "GLM"),
train = list(train_df,train_df,train_df,train_df,train_df,train_df),
test = list(test_df,test_df,test_df,test_df,test_df,test_df))
recipe_rf <- recipe(Diagnosis ~ ., data=train_df)
recipe_pls_svm <- recipe(Diagnosis ~ ., data=train_df) %>%
step_normalize(all_numeric(),-all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes())
recipe_pca_knn <- recipe(Diagnosis ~ ., data=train_df) %>%
step_normalize(all_numeric(),-all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_pca(all_predictors(), num_comp = 19)
recipe_xgb_glm <- recipe(Diagnosis ~ ., data=train_df) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_mutate_at(all_predictors(), fn = ~ as.numeric(.)) # required by XGBoost
model_PLSDA <- pls(num_comp=3) %>%
set_engine("mixOmics") %>%
set_mode("classification")
model_RF <- rand_forest(trees = 154,
min_n = 8) %>%
set_engine("ranger", importance = "impurity") %>%
set_mode("classification")
model_KNN <- nearest_neighbor(neighbors = 88) %>%
set_engine("kknn") %>%
set_mode("classification")
model_SVM <- svm_rbf(cost=27.695185,
rbf_sigma = 3.779591e-06,
margin = 0.01473722) %>%
set_engine("kernlab") %>%
set_mode("classification")
model_XGB <- boost_tree(trees = 426,
min_n = 5,
tree_depth = 12,
learn_rate = 0.090994590,
loss_reduction = 1.094324e-10) %>%
set_engine("xgboost") %>%
set_mode("classification")
model_GLM <- multinom_reg(penalty = 0.67959909,
mixture = 0.0592261) %>%
set_engine("glmnet") %>%
set_mode("classification")
data_ml$classifier
## [1] "RF" "PLSDA" "KNN" "SVM" "XGB" "GLM"
df_wflow <- tibble(classifier=data_ml$classifier, wflow=c(),
recipe=list(recipe_rf, recipe_pls_svm, recipe_pca_knn, recipe_pls_svm, recipe_xgb_glm, recipe_xgb_glm))
for (i in df_wflow$classifier) {
x <- paste("model_",i, sep="")
j <- which(df_wflow$classifier == i)
df_wflow$wflow[[j]] <- workflow() %>%
add_model(eval(parse(text = x))) %>%
add_recipe(df_wflow$recipe[[j]])
}
set.seed(42)
tic()
data_ml_fit <- data_ml %>% add_column(workflow = df_wflow$wflow) %>%
mutate(fitted = map2(workflow, train, ~fit(.x,.y))) %>%
mutate(pred_prob = map2(fitted, test, ~predict(.x,.y, type="prob"))) %>%
mutate(pred_class = map2(fitted, test, ~predict(.x,.y, type="class")))
toc()
## 231.92 sec elapsed
calc_metrics <- function(data_ml_fit) {
df <- tibble(Classifier=data_ml_fit$classifier)
for (i in 1:nrow(data_ml_fit)) {
df$AUC[[i]] <- data_ml_fit$pred_prob[[i]] %>% roc_auc(estimate=c(names(data_ml_fit$pred_prob[[i]])), truth= test_df_truth$Diagnosis,estimator="hand_till") %>% pull(.estimate) %>% round(2)
df$`Balanced Accuracy`[[i]] <- data_ml_fit$pred_class[[i]] %>% bal_accuracy(estimate=data_ml_fit$pred_class[[i]] %>% pull(),
truth=test_df_truth$Diagnosis,
estimator="macro_weighted") %>% pull(.estimate) %>% round(2) # average of sensitivity and specificity
df$Sensitivity[[i]] <- data_ml_fit$pred_class[[i]] %>% sens(estimate=data_ml_fit$pred_class[[i]] %>% pull(), truth=test_df_truth$Diagnosis) %>% pull(.estimate) %>% round(2)
df$Specificity[[i]] <- data_ml_fit$pred_class[[i]] %>% spec(estimate=data_ml_fit$pred_class[[i]] %>% pull(), truth=test_df_truth$Diagnosis) %>% pull(.estimate) %>% round(2)
}
return(df %>% unnest())
}
calc_metrics(data_ml_fit)
## # A tibble: 6 × 5
## Classifier AUC `Balanced Accuracy` Sensitivity Specificity
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 RF 0.9 0.83 0.77 0.89
## 2 PLSDA 0.89 0.79 0.72 0.86
## 3 KNN 0.87 0.61 0.48 0.74
## 4 SVM 0.84 0.82 0.75 0.88
## 5 XGB 0.9 0.82 0.76 0.88
## 6 GLM 0.85 0.8 0.72 0.86
Selecting the top three classifiers for model stacking (meta learning).
set.seed(42)
resamples <- vfold_cv(train_df, v=5)
ctrl_res <- control_stack_resamples()
tic()
data_ml_fit_stack <- data_ml_fit %>% filter(classifier %in% c("RF", "PLSDA", "XGB")) %>%
mutate(resamples = list(resamples, resamples, resamples)) %>%
mutate(stacks = map2(workflow, resamples, ~fit_resamples(.x,.y, control=ctrl_res)))
toc()
## 802.926 sec elapsed
mdl_stack_fit <- stacks() %>%
add_candidates(data_ml_fit_stack$stacks[[1]]) %>%
add_candidates(data_ml_fit_stack$stacks[[2]]) %>%
add_candidates(data_ml_fit_stack$stacks[[3]]) %>%
blend_predictions(non_negative = TRUE) %>%
fit_members()
data_stack <- tibble(classifier = "Stack")
data_stack$pred_class[[1]] <- predict(mdl_stack_fit, new_data=test_df, type="class")
data_stack_prob <- predict(mdl_stack_fit,test_df, type="prob")
data_stack$pred_prob[[1]] <- tibble(pred_prob=list(data_stack_prob)) %>% unnest(pred_prob)
data_ml_fit <- bind_rows(data_ml_fit, data_stack)
calc_metrics(data_ml_fit)
## Warning: Unknown or uninitialised column: `AUC`.
## Warning: Unknown or uninitialised column: `Balanced Accuracy`.
## Warning: Unknown or uninitialised column: `Sensitivity`.
## Warning: Unknown or uninitialised column: `Specificity`.
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(AUC, `Balanced Accuracy`, Sensitivity, Specificity)`
## # A tibble: 7 × 5
## Classifier AUC `Balanced Accuracy` Sensitivity Specificity
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 RF 0.9 0.83 0.77 0.89
## 2 PLSDA 0.89 0.79 0.72 0.86
## 3 KNN 0.87 0.61 0.48 0.74
## 4 SVM 0.84 0.82 0.75 0.88
## 5 XGB 0.9 0.82 0.76 0.88
## 6 GLM 0.85 0.8 0.72 0.86
## 7 Stack 0.88 0.85 0.8 0.9
df <- tibble(Classifier=data_ml_fit$classifier, AUC = c())
for (i in 1:nrow(data_ml_fit)) {
df$AUC[[i]] <- data_ml_fit$pred_prob[[i]] %>%
roc_curve(estimate=c(names(data_ml_fit$pred_prob[[i]])), truth=test_df_truth$Diagnosis)
}
bind_rows_func <- function(df){
df_auc <- bind_rows(RF = df$AUC[[1]],
PLSDA = df$AUC[[2]],
KNN = df$AUC[[3]],
SVM = df$AUC[[4]],
XGB = df$AUC[[5]],
GLM = df$AUC[[6]],
Stack = df$AUC[[7]],
.id= "Model")
return(df_auc)
}
bind_rows_func(df) %>% group_by(Model) %>% mutate(.level = case_when(.level == 1 ~ "Control",
.level == 2 ~ "CD",
TRUE ~ "UC")) %>%
mutate(.level = fct_relevel(as.factor(.level), c("Control", "CD", "UC")),
Model = as.factor(Model),
Model = fct_relevel(Model, data_ml_fit$classifier)) %>%
ggplot(aes(x = 1 - specificity, y = sensitivity, color=Model,group=Model)) +
geom_path() +
geom_abline(lty = 3) +
coord_equal() +
facet_wrap(~.level) +
theme_bw()
data_ml_fit_ibd <- data_ml_fit
for (i in 1:nrow(data_ml_fit_ibd)) {
data_ml_fit_ibd$pred_prob[[i]] <- data_ml_fit$pred_prob[[i]] %>% mutate(.pred_IBD = .pred_2 + .pred_3) %>%
select(.pred_Control = .pred_1, -c(.pred_2, .pred_3)) %>%
add_column(test_df_truth %>% mutate(Diagnosis = case_when(Diagnosis == "2" ~ "IBD",
Diagnosis == "3" ~ "IBD",
TRUE ~ "Control"))) %>%
mutate(Diagnosis = as.factor(Diagnosis)) %>%
select(truth = Diagnosis, everything()) %>% droplevels()
}
for (i in 1:nrow(data_ml_fit_ibd)) {
data_ml_fit_ibd$pred_class[[i]] <- data_ml_fit$pred_class[[i]] %>% mutate(.pred_class = case_when(
.pred_class == "1" ~ "Control",
.pred_class == "3" ~ "IBD",
TRUE ~ "IBD")) %>%
mutate(.pred_class = as.factor(.pred_class)) %>%
add_column(test_df_truth %>% mutate(Diagnosis = case_when(Diagnosis == "2" ~ "IBD",
Diagnosis == "3" ~ "IBD",
TRUE ~ "Control"))) %>%
mutate(Diagnosis = as.factor(Diagnosis)) %>%
select(truth = Diagnosis, everything())
}
calc_metrics_ibd <- function(data_ml_fit_ibd) {
df <- tibble(Classifier = data_ml_fit_ibd$classifier)
for (i in 1:nrow(data_ml_fit_ibd)) {
df$AUC[[i]] <- data_ml_fit_ibd$pred_prob[[i]] %>% roc_auc(estimate=.pred_Control, truth=truth) %>% pull(.estimate) %>% round(2)
df$`Balanced Accuracy`[[i]] <- data_ml_fit_ibd$pred_class[[i]] %>% bal_accuracy(estimate=.pred_class, truth=truth) %>% pull(.estimate) %>% round(2) # average of sensitivity and specificity
df$Sensitivity[[i]] <- data_ml_fit_ibd$pred_class[[i]] %>% sens(estimate = .pred_class, truth=truth, event_level = "second") %>% pull(.estimate) %>% round(2) # IBD is the event
df$Specificity[[i]] <- data_ml_fit_ibd$pred_class[[i]] %>% spec(estimate = .pred_class, truth=truth, event_level = "second") %>% pull(.estimate) %>% round(2)
}
return(df %>% unnest())
}
df_ibd_curve <- tibble(Classifier=data_ml_fit$classifier, AUC = c())
for (i in 1:nrow(data_ml_fit_ibd)) {
df_ibd_curve$AUC[[i]] <- data_ml_fit_ibd$pred_prob[[i]] %>% roc_curve(estimator=.pred_Control, truth=truth)
}
df_roc_curve_ibd <- bind_rows_func(df_ibd_curve) %>%
mutate(Model = as.factor(Model)) %>%
mutate(Model = fct_relevel(Model, data_ml_fit_ibd$classifier))
df_ibd_score <- tibble(Classifier=data_ml_fit_ibd$classifier, AUC = c())
for (i in 1:nrow(data_ml_fit_ibd)) {
df_ibd_score$AUC[[i]] <- data_ml_fit_ibd$pred_prob[[i]] %>% roc_auc(estimate=.pred_Control, truth=truth)
}
df_roc_score <- bind_rows_func(df_ibd_score) %>%
select(Model, AUC = .estimate) %>% mutate(AUC = round(AUC, 2))
df_geom <- data.frame(Model = data_ml_fit_ibd$classifier, Score = (paste0(" (AUC = ", df_roc_score$AUC, ")"))) %>%
mutate(Model = as.factor(Model),
Model = fct_relevel(Model, data_ml_fit_ibd$classifier))
df_roc_curve_ibd %>%
ggplot(aes(x = 1 - specificity, y = sensitivity, color=Model, group=Model)) +
geom_path() +
geom_abline(lty = 3) +
coord_equal() +
theme_bw() +
theme(legend.position="none")+
geom_text(data=df_geom, aes(x = .63, y = .075, label = Model), size= 3.5, position="stack",hjust=0, show.legend=FALSE)+
geom_text(data=df_geom, aes(x = .76, y = .075, label = Score, group=Model, color=NULL),size= 3.5,position="stack", hjust=0, show.legend=FALSE)
calc_metrics_ibd(data_ml_fit_ibd)
## Warning: Unknown or uninitialised column: `AUC`.
## Warning: Unknown or uninitialised column: `Balanced Accuracy`.
## Warning: Unknown or uninitialised column: `Sensitivity`.
## Warning: Unknown or uninitialised column: `Specificity`.
## Warning: `cols` is now required when using unnest().
## Please use `cols = c(AUC, `Balanced Accuracy`, Sensitivity, Specificity)`
## # A tibble: 7 × 5
## Classifier AUC `Balanced Accuracy` Sensitivity Specificity
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 RF 0.93 0.91 0.95 0.86
## 2 PLSDA 0.89 0.86 0.77 0.95
## 3 KNN 0.91 0.72 0.93 0.5
## 4 SVM 0.88 0.86 0.77 0.95
## 5 XGB 0.91 0.84 0.91 0.77
## 6 GLM 0.88 0.84 0.91 0.77
## 7 Stack 0.93 0.9 0.88 0.91
print(levels(data_ml_fit_ibd$pred_class[[1]]$.pred_class)) # Level order of predicted diagnosis
## [1] "Control" "IBD"
levels(data_ml_fit_ibd$pred_class[[1]]$truth) # Level order of the actual diagnosis
## [1] "Control" "IBD"