## Import Data
setwd("C:\\Users\\vinic\\OneDrive\\Área de Trabalho\\doutorado_peb\\peb_redes_neurais")
import.data <- data.table::fread("dataset_10in10_som.csv", stringsAsFactors = TRUE, data.table = FALSE, check.names = TRUE)
import.data <- import.data[complete.cases(import.data),]
### Transform selected numeric to factors
import.data[, "ethnicit"] <- as.factor(import.data[, "ethnicit"])
import.data[, "racegp"] <- as.factor(import.data[, "racegp"])
import.data[, "ragecat"] <- as.factor(import.data[, "ragecat"])
import.data[, "dagecat"] <- as.factor(import.data[, "dagecat"])
import.data[, "disease"] <- as.factor(import.data[, "disease"])
import.data[, "kps"] <- as.factor(import.data[, "kps"])
import.data[, "dissta"] <- as.factor(import.data[, "dissta"])
import.data[, "hctci"] <- as.factor(import.data[, "hctci"])
import.data[, "drcmvpr"] <- as.factor(import.data[, "drcmvpr"])
import.data[, "drsex"] <- as.factor(import.data[, "drsex"])
import.data[, "graftype"] <- as.factor(import.data[, "graftype"])
import.data[, "atgcampathgp"] <- as.factor(import.data[, "atgcampathgp"])
import.data[, "condint"] <- as.factor(import.data[, "condint"])
import.data[, "gvhprhrx"] <- as.factor(import.data[, "gvhprhrx"])
import.data[, "anc"] <- as.factor(import.data[, "anc"])
import.data[, "plt"] <- as.factor(import.data[, "plt"])
import.data[, "rel"] <- as.factor(import.data[, "rel"])
import.data[, "agvhd24"] <- as.factor(import.data[, "agvhd24"])
import.data[, "agvhd34"] <- as.factor(import.data[, "agvhd34"])
import.data[, "cgvhd"] <- as.factor(import.data[, "cgvhd"])
## Build training data (categorical to dummies)
cat.data <- import.data[c("ethnicit", "racegp", "ragecat", "dagecat", "disease", "kps", "dissta", "hctci", "drcmvpr", "drsex", "graftype", "atgcampathgp", "condint", "gvhprhrx", "anc", "plt", "rel", "agvhd24", "agvhd34", "cgvhd")]
train.data <- cdt(cat.data)
catLevels <- colnames(train.data)
varWeights <- c("ethnicit_1" = 0.5, "ethnicit_2" = 0.5, "ethnicit_3" = 0.5, "racegp_1" = 0.333, "racegp_2" = 0.333, "racegp_3" = 0.333, "racegp_4" = 0.333, "ragecat_0" = 0.167, "ragecat_1" = 0.167, "ragecat_2" = 0.167, "ragecat_3" = 0.167, "ragecat_4" = 0.167, "ragecat_5" = 0.167, "ragecat_6" = 0.167, "dagecat_1" = 0.25, "dagecat_2" = 0.25, "dagecat_3" = 0.25, "dagecat_4" = 0.25, "dagecat_5" = 0.25, "disease_10" = 0.5, "disease_20" = 0.5, "disease_50" = 0.5, "kps_0" = 1, "kps_1" = 1, "dissta_1" = 0.5, "dissta_2" = 0.5, "dissta_3" = 0.5, "hctci_1" = 1, "hctci_2" = 1, "drcmvpr_0" = 0.333, "drcmvpr_1" = 0.333, "drcmvpr_2" = 0.333, "drcmvpr_3" = 0.333, "drsex_1" = 0.333, "drsex_2" = 0.333, "drsex_3" = 0.333, "drsex_4" = 0.333, "graftype_1" = 1, "graftype_22" = 1, "atgcampathgp_2" = 1, "atgcampathgp_4" = 1, "condint_1" = 1, "condint_2" = 1, "gvhprhrx_1" = 1, "gvhprhrx_2" = 1, "anc_0" = 1, "anc_1" = 1, "plt_0" = 1, "plt_1" = 1, "rel_0" = 1, "rel_1" = 1, "agvhd24_0" = 1, "agvhd24_1" = 1, "agvhd34_0" = 1, "agvhd34_1" = 1, "cgvhd_0" = 1, "cgvhd_1" = 1)
### Scale training variables (MCA-type scaling)
train.data <- t(t(train.data) / sqrt(colMeans(train.data, na.rm = TRUE)))
### Apply variables weights
train.data <- t(t(train.data) * sqrt(varWeights))
### Prepare plotting data
plot.data <- cbind(import.data, cdt(cat.data))