Se ha desarrollado un modelo de clasificación supervisada del DETERIORO utilizando principalmente variables clínicas, junto con algunas funciones de autocorrelación (ACF), que ocupan la posición 10 en importancia según el modelo optimizado (ver Tabla 5.23).
Adicionalmente, algunas funciones de correlación cruzada (CCF) y ACF aparecen en el modelo inicial (ver Tabla 5.20).
La contribución principal de este enfoque es la introducción de variables no estándar (ACF, CCF) en el modelo de clasificación.
El objetivo es repetir el análisis excluyendo las variables no estándar para evaluar la relevancia de estas nuevas variables. Sería alentador si la precisión (accuracy) con solo variables clínicas fuera menor que en el modelo combinado, ya que esto sugeriría que las variables añadidas son importantes.
Esta tarea implica realizar el análisis exclusivamente con las variables no estándar, que son variables netamente estadísticas derivadas de las series temporales de SatO2 y FC. Aquí, podríamos incluir variables de tipo media y varianza.
Sería especialmente positivo si estas variables estadísticas por sí solas lograran una precisión razonable, indicando así su relevancia para el modelo.
En resumen, estas tareas proporcionarán una comprensión más profunda sobre la contribución y relevancia de las variables clínicas y estadísticas en el modelo de clasificación del DETERIORO.
Libraries
library(readxl)
library(readr)
library(dplyr)
library(DT)
# Plottig purposes
library(ggplot2)
library(gridExtra)
library(knitr)
# RF
library(randomForest) # RandomForest Discrete Classification
library(imbalance) # To create a more balanced dataset
Functions
source("../../scripts/useful-functions/get_column_position.R")
# In a normal script it will be: source("./scripts/useful-functions/get_column_position.R")
Load Data
file_patient_name <- data.frame(read_csv("../../data/clean-data/file_patient_name.csv", show_col_types = FALSE))
file_patient_name <- file_patient_name$x
# First patients with OAF
name_patients_DETERIORO_OAF_0 <- data.frame(read_csv("../../data/clean-data/name_patients_DETERIORO_OAF_0.csv"))
name_patients_DETERIORO_OAF_0 <- name_patients_DETERIORO_OAF_0$x
name_patients_DETERIORO_OAF_0_8 <- data.frame(read_csv("../../data/clean-data/name_patients_DETERIORO_OAF_0_8.csv"))
name_patients_DETERIORO_OAF_0_8 <- name_patients_DETERIORO_OAF_0_8$x
valid_patients_P2 <- data.frame(read_xlsx("../../data/clean-data/valid_patients_P2.xlsx"))
valid_patients_P2 <- valid_patients_P2$x
valid_patients_P2 <- valid_patients_P2[! valid_patients_P2 %in% union(name_patients_DETERIORO_OAF_0,name_patients_DETERIORO_OAF_0_8)]
file_patient_name <- data.frame(read_csv("../../data/clean-data/file_patient_name.csv", show_col_types = FALSE))
file_patient_name <- file_patient_name$x
## UCIP
file_patient_name_UCIP <- data.frame(read_csv("../../data/info-patients/file_patient_name_UCIP.csv"))
file_patient_name_UCIP <- file_patient_name_UCIP$x
## Deterioro and NOT deterioro
file_patient_name_NO_DETERIORO <- data.frame(read_csv("../../data/info-patients/file_patient_name_NO_DETERIORO.csv"))
file_patient_name_NO_DETERIORO <- file_patient_name_NO_DETERIORO$x
file_patient_name_DETERIORO <- data.frame(read_csv("../../data/info-patients/file_patient_name_DETERIORO.csv"))
file_patient_name_DETERIORO <- file_patient_name_DETERIORO$x
Descriptive Data
Descriptive Data after imputation
df_descriptive <- data.frame(read_xlsx("../../data/clean-data/descriptive-data/descriptive_data_imputed.xlsx"), row.names = TRUE)
df_descriptive <- df_descriptive %>%
mutate_if(is.character, as.factor)
df_descriptive_mask <- data.frame(read_xlsx("../../data/clean-data/descriptive-data/descriptive_data_imputed_mask.xlsx"), row.names = TRUE)
rownames(df_descriptive) <- file_patient_name
rownames(df_descriptive_mask) <- file_patient_name
Descriptive Data after selection of valid patients
df_descriptive_P2 <- df_descriptive[valid_patients_P2,]
# Imputed Data
cuantiles_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/cuantiles_TS_HR_valid_patients_input_P2.xlsx", sheet = "FC_valid_patients_input_P2" ))
cuantiles_TS_HR_P2 <- cuantiles_TS_HR_P2[,valid_patients_P2]
SatO2_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/SatO2_valid_patients_input_P2.xlsx", sheet = "SatO2_valid_patients_input_P2" ))
SatO2_TS_HR_P2 <- SatO2_TS_HR_P2[,valid_patients_P2]
FC_TS_HR_P2 = data.frame(read_xlsx("../../data/clean-data/BoxBasedImputation/FC_valid_patients_input_P2.xlsx", sheet = "FC_valid_patients_input_P2" ))
FC_TS_HR_P2 <- FC_TS_HR_P2[,valid_patients_P2]
FC_TS_HR_P2_scaled <- data.frame(scale(FC_TS_HR_P2))
SatO2_TS_HR_P2_scaled <- data.frame(scale(SatO2_TS_HR_P2))
set.seed(12345)
df <- FC_TS_HR_P2[,valid_patients_P2]
# Agrupar los datos por intervalos de 60 filas y calcular las medias
medias_FC_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), mean, na.rm = TRUE)) %>%
select(-group)
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(everything(), mean, na.rm = TRUE)`.
## ℹ In group 1: `group = 1`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
# Colnames
mean_vector <- sprintf("Mean_FC_P2_%d", seq(1, 8))
medias_FC_P2 <- t(medias_FC_P2)
colnames(medias_FC_P2) <- mean_vector
set.seed(12345)
df <- FC_TS_HR_P2[,valid_patients_P2]
# Agrupar los datos por intervalos de 60 filas (1h) y calcular las varianzas
var_FC_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), ~var(., na.rm = TRUE))) %>%
select(-group)
# Colnames
var_vector <- sprintf("Var_FC_P2_%d", seq(1, 8))
var_FC_P2 <- t(var_FC_P2)
colnames(var_FC_P2) <- var_vector
df <- SatO2_TS_HR_P2[,valid_patients_P2]
# Agrupar los datos por intervalos de 60 filas y calcular las medias
medias_SatO2_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), mean, na.rm = TRUE)) %>%
select(-group)
# Colnames
mean_vector <- sprintf("Mean_SatO2_P2_%d", seq(1, 8))
medias_SatO2_P2 <- t(medias_SatO2_P2)
colnames(medias_SatO2_P2) <- mean_vector
df <- SatO2_TS_HR_P2[,valid_patients_P2]
# Agrupar los datos por intervalos de 60 filas (1h) y calcular las varianzas
var_SatO2_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), ~var(., na.rm = TRUE))) %>%
select(-group)
# Colnames
var_vector <- sprintf("Var_SatO2_P2_%d", seq(1, 8))
var_SatO2_P2 <- t(var_SatO2_P2)
colnames(var_SatO2_P2) <- var_vector
df <- cuantiles_TS_HR_P2[,valid_patients_P2]
# Agrupar los datos por intervalos de 60 filas y calcular las medias
medias_Q_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), mean, na.rm = TRUE)) %>%
select(-group)
# Colnames
mean_vector <- sprintf("Mean_Q_P2_%d", seq(1, 8))
medias_Q_P2 <- t(medias_Q_P2)
colnames(medias_Q_P2) <- mean_vector
df <- cuantiles_TS_HR_P2[,valid_patients_P2]
# Agrupar los datos por intervalos de 60 filas (1h) y calcular las varianzas
var_Q_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), ~var(., na.rm = TRUE))) %>%
select(-group)
# Colnames
Q_vector <- sprintf("Var_Q_P2_%d", seq(1, 8))
var_Q_P2 <- t(var_Q_P2)
colnames(var_Q_P2) <- Q_vector
df <- data.frame(scale(FC_TS_HR_P2[,valid_patients_P2]))
# Agrupar los datos por intervalos de 60 filas y calcular las medias
medias_SC_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), mean, na.rm = TRUE)) %>%
select(-group)
# Colnames
mean_vector <- sprintf("Mean_SC_FC_P2_%d", seq(1, 8))
medias_SC_P2 <- t(medias_SC_P2)
colnames(medias_SC_P2) <- mean_vector
df <- data.frame(scale(FC_TS_HR_P2[,valid_patients_P2]))
# Agrupar los datos por intervalos de 60 filas (1h) y calcular las varianzas
var_SC_FC_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), ~var(., na.rm = TRUE))) %>%
select(-group)
# Colnames
FC_SC_vector <- sprintf("Var_SC_FC_P2_%d", seq(1, 8))
var_SC_FC_P2 <- t(var_SC_FC_P2)
colnames(var_SC_FC_P2) <- FC_SC_vector
df <- data.frame(scale(SatO2_TS_HR_P2[,valid_patients_P2]))
# Agrupar los datos por intervalos de 60 filas y calcular las medias
medias_SC_SO2_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), mean, na.rm = TRUE)) %>%
select(-group)
# Colnames
mean_vector <- sprintf("Mean_SC_SO2_P2_%d", seq(1, 8))
medias_SC_SO2_P2 <- t(medias_SC_SO2_P2)
colnames(medias_SC_SO2_P2) <- mean_vector
df <- data.frame(scale(SatO2_TS_HR_P2[,valid_patients_P2]))
# Agrupar los datos por intervalos de 60 filas (1h) y calcular las varianzas
var_SC_SatO2_P2 <- df %>%
mutate(group = rep(1:8, each = 60)) %>%
group_by(group) %>%
summarise(across(everything(), ~var(., na.rm = TRUE))) %>%
select(-group)
# Colnames
SatO2_SC_vector <- sprintf("Var_SatO2_FC_P2_%d", seq(1, 8))
var_SC_SatO2_P2 <- t(var_SC_SatO2_P2)
colnames(var_SC_SatO2_P2) <- SatO2_SC_vector
dimension_col <- dim(FC_TS_HR_P2_scaled)[2]
dimension_row <- 480 #lag.max -1
# FC_scaled
FC_TS_HR_P2_scaled_ACF <- data.frame(matrix(nrow = dimension_row, ncol = dimension_col))
colnames(FC_TS_HR_P2_scaled_ACF) <- names(FC_TS_HR_P2_scaled)[1:dimension_col]
for (i in names(FC_TS_HR_P2_scaled_ACF)) {
acf_result_FC_scaled <- forecast::Acf(FC_TS_HR_P2_scaled[[i]], lag.max = (dimension_row - 1), plot = FALSE)
FC_TS_HR_P2_scaled_ACF[, i] <- acf_result_FC_scaled$acf
}
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
dimension_col <- dim(SatO2_TS_HR_P2_scaled)[2]
dimension_row <- 480 #lag.max -1
# FC_scaled
SatO2_TS_HR_P2_scaled_ACF <- data.frame(matrix(nrow = dimension_row, ncol = dimension_col))
colnames(SatO2_TS_HR_P2_scaled_ACF) <- names(SatO2_TS_HR_P2_scaled)[1:dimension_col]
for (i in names(SatO2_TS_HR_P2_scaled_ACF)) {
acf_result_SatO2_scaled <- forecast::Acf(SatO2_TS_HR_P2_scaled[[i]], lag.max = (dimension_row - 1), plot = FALSE)
SatO2_TS_HR_P2_scaled_ACF[, i] <- acf_result_SatO2_scaled$acf
}
dimension_col <- dim(FC_TS_HR_P2)[2]
dimension_row <- 480 #lag.max -1
SatO2_FC_CCF <- data.frame(matrix(nrow = dimension_row * 2 - 1, ncol = dimension_col))
colnames(SatO2_FC_CCF) <- names(FC_TS_HR_P2)[1:dimension_col]
m <- forecast::Ccf(FC_TS_HR_P2[[1]], SatO2_TS_HR_P2[[1]], lag.max = dimension_row - 1, plot = FALSE, drop.lag.0 = FALSE, type = "correlation", ylab = "CCF")
for (i in names(SatO2_FC_CCF)) {
ccf_result <- forecast::Ccf(FC_TS_HR_P2[[i]], SatO2_TS_HR_P2[[i]], lag.max = dimension_row - 1, plot = FALSE, drop.lag.0 = FALSE, type = "correlation", ylab = "CCF")
SatO2_FC_CCF[, i] <- ccf_result$acf
}
Variables that it is neccesary to delete in df_descriptive_P2
no_class <- c("UCIP","OAF","OAF_AL_INGRESO","OAF_TRAS_INGRESO")
more_than_8 <- c("DIAS_GN","DIAS_O2_TOTAL","DIAS_OAF")
df_descriptive_P2 <- df_descriptive_P2[,!names(df_descriptive_P2) %in% c(no_class,more_than_8)]
# Move DETERIORO to the END
df_descriptive_P2 <- df_descriptive_P2 %>%
select(-DETERIORO, everything())
head(df_descriptive_P2)
## EDAD PESO EG FR_0_8h FR_8_16h FR_16_24h FLUJO2_0_8H FLUJO2_8_16h
## ACR_11231843 10.0 8.20 41 48 54.0 42.0 2.00 2.0
## ADAO_11159808 13.0 7.78 40 56 52.0 42.0 2.00 2.0
## AGG_11236448 3.1 5.66 37 44 60.0 52.0 1.00 0.5
## AHL_11239959 5.3 8.44 38 65 64.0 50.0 0.40 0.4
## AJGD_11119689 15.0 7.00 34 37 38.8 36.0 2.00 2.0
## AMP_11228639 1.6 3.80 37 42 32.0 42.8 0.94 0.4
## FLUJO2_16_24h SAPI_0_8h SAPI_8_16h SAPI_16_24h
## ACR_11231843 2.0 2 2 2
## ADAO_11159808 2.0 3 3 3
## AGG_11236448 0.5 2 2 2
## AHL_11239959 0.4 3 2 1
## AJGD_11119689 2.0 0 1 2
## AMP_11228639 0.3 1 1 1
## SCORE_CRUCES_INGRESO SCORE_WOOD_DOWNES_INGRESO
## ACR_11231843 2 5
## ADAO_11159808 3 7
## AGG_11236448 2 6
## AHL_11239959 2 5
## AJGD_11119689 2 5
## AMP_11228639 3 6
## SCORE_WOOD_DOWNES_24H SEXO PALIVIZUMAB LM DERMATITIS ALERGIAS
## ACR_11231843 5 0 0 1 0 1
## ADAO_11159808 8 0 0 0 0 1
## AGG_11236448 5 0 0 1 0 0
## AHL_11239959 6 0 0 1 0 0
## AJGD_11119689 5 0 1 0 0 0
## AMP_11228639 4 0 0 1 0 0
## TABACO ENFERMEDAD_BASE RADIOGRAFIA ANALITICA SUERO ETIOLOGIA
## ACR_11231843 0 0 0 0 0 1
## ADAO_11159808 1 1 0 0 1 0
## AGG_11236448 0 0 0 0 0 1
## AHL_11239959 0 0 0 0 0 1
## AJGD_11119689 1 1 0 0 1 1
## AMP_11228639 1 1 0 0 0 0
## PREMATURIDAD ALIMENTACION SNG GN_INGRESO PAUSAS_APNEA DETERIORO
## ACR_11231843 0 1 0 1 0 0
## ADAO_11159808 0 0 0 1 0 0
## AGG_11236448 0 1 0 1 0 0
## AHL_11239959 0 1 0 0 0 0
## AJGD_11119689 1 1 0 1 0 0
## AMP_11228639 0 1 0 0 0 0
Deleting variables that occur after the 8 first hours
df_descriptive_P2_8 <- df_descriptive_P2 %>% select(-c(FR_8_16h, FR_16_24h, FLUJO2_8_16h,FLUJO2_16_24h,SCORE_WOOD_DOWNES_24H,SAPI_16_24h, SAPI_8_16h))
first.model.df <- cbind(df_descriptive_P2_8)
ACF_HR_50 <- t(FC_TS_HR_P2_scaled_ACF)[,c(1:50)]
colnames(ACF_HR_50) <- paste0("ACF_HR",c(1:50))
ACF_SatO2_50 <- t(SatO2_TS_HR_P2_scaled_ACF)[,c(1:50)]
colnames(ACF_SatO2_50) <- paste0("ACF_SatO2",c(1:50))
CCF_100 <- t(SatO2_FC_CCF)[,c(1:100)]
colnames(CCF_100) <- paste0("CCF_",c(1:100))
Clinical Variables
Mean SatO2 Scaled
Mean SC (Scaled FC data)
Var SatO2 Scaled
Var SC (Scaled FC data)
second.model.df <- cbind(df_descriptive_P2_8,ACF_HR_50,ACF_SatO2_50,CCF_100,medias_SC_P2,var_SC_FC_P2,medias_SC_SO2_P2, var_SC_SatO2_P2, medias_Q_P2, var_Q_P2)
head(first.model.df)
## EDAD PESO EG FR_0_8h FLUJO2_0_8H SAPI_0_8h SCORE_CRUCES_INGRESO
## ACR_11231843 10.0 8.20 41 48 2.00 2 2
## ADAO_11159808 13.0 7.78 40 56 2.00 3 3
## AGG_11236448 3.1 5.66 37 44 1.00 2 2
## AHL_11239959 5.3 8.44 38 65 0.40 3 2
## AJGD_11119689 15.0 7.00 34 37 2.00 0 2
## AMP_11228639 1.6 3.80 37 42 0.94 1 3
## SCORE_WOOD_DOWNES_INGRESO SEXO PALIVIZUMAB LM DERMATITIS ALERGIAS
## ACR_11231843 5 0 0 1 0 1
## ADAO_11159808 7 0 0 0 0 1
## AGG_11236448 6 0 0 1 0 0
## AHL_11239959 5 0 0 1 0 0
## AJGD_11119689 5 0 1 0 0 0
## AMP_11228639 6 0 0 1 0 0
## TABACO ENFERMEDAD_BASE RADIOGRAFIA ANALITICA SUERO ETIOLOGIA
## ACR_11231843 0 0 0 0 0 1
## ADAO_11159808 1 1 0 0 1 0
## AGG_11236448 0 0 0 0 0 1
## AHL_11239959 0 0 0 0 0 1
## AJGD_11119689 1 1 0 0 1 1
## AMP_11228639 1 1 0 0 0 0
## PREMATURIDAD ALIMENTACION SNG GN_INGRESO PAUSAS_APNEA DETERIORO
## ACR_11231843 0 1 0 1 0 0
## ADAO_11159808 0 0 0 1 0 0
## AGG_11236448 0 1 0 1 0 0
## AHL_11239959 0 1 0 0 0 0
## AJGD_11119689 1 1 0 1 0 0
## AMP_11228639 0 1 0 0 0 0
#table(df_descriptive_P2_8$DETERIORO)
set.seed(12345)
first.model.df <- as.data.frame(sapply(first.model.df, as.numeric))
first.model.df$DETERIORO <- factor(first.model.df$DETERIORO)
first.newMWMOTE_FIN <- imbalance::oversample(first.model.df, ratio = 0.80, method = "SMOTE", classAttr = "DETERIORO")
first.newMWMOTE_FIN <- data.frame(first.newMWMOTE_FIN)
#Treatment of Categorical Values
set.seed(12345)
pos_1 <- get_column_position(first.newMWMOTE_FIN, "SAPI_0_8h")
pos_2 <- get_column_position(first.newMWMOTE_FIN, "SCORE_WOOD_DOWNES_INGRESO")
pos_3 <- get_column_position(first.newMWMOTE_FIN, "DETERIORO")
columns_to_round <- setdiff(pos_1:pos_2, pos_3)
first.newMWMOTE_FIN[, columns_to_round] <- lapply(first.newMWMOTE_FIN[, columns_to_round], function(x) round(x, 1))
col_names_factor <- names(first.newMWMOTE_FIN[pos_1:pos_2])
first.newMWMOTE_FIN[col_names_factor] <- lapply(first.newMWMOTE_FIN[col_names_factor] , factor)
#table(first.newMWMOTE_FIN$DETERIORO)
head(second.model.df)
## EDAD PESO EG FR_0_8h FLUJO2_0_8H SAPI_0_8h SCORE_CRUCES_INGRESO
## ACR_11231843 10.0 8.20 41 48 2.00 2 2
## ADAO_11159808 13.0 7.78 40 56 2.00 3 3
## AGG_11236448 3.1 5.66 37 44 1.00 2 2
## AHL_11239959 5.3 8.44 38 65 0.40 3 2
## AJGD_11119689 15.0 7.00 34 37 2.00 0 2
## AMP_11228639 1.6 3.80 37 42 0.94 1 3
## SCORE_WOOD_DOWNES_INGRESO SEXO PALIVIZUMAB LM DERMATITIS ALERGIAS
## ACR_11231843 5 0 0 1 0 1
## ADAO_11159808 7 0 0 0 0 1
## AGG_11236448 6 0 0 1 0 0
## AHL_11239959 5 0 0 1 0 0
## AJGD_11119689 5 0 1 0 0 0
## AMP_11228639 6 0 0 1 0 0
## TABACO ENFERMEDAD_BASE RADIOGRAFIA ANALITICA SUERO ETIOLOGIA
## ACR_11231843 0 0 0 0 0 1
## ADAO_11159808 1 1 0 0 1 0
## AGG_11236448 0 0 0 0 0 1
## AHL_11239959 0 0 0 0 0 1
## AJGD_11119689 1 1 0 0 1 1
## AMP_11228639 1 1 0 0 0 0
## PREMATURIDAD ALIMENTACION SNG GN_INGRESO PAUSAS_APNEA DETERIORO
## ACR_11231843 0 1 0 1 0 0
## ADAO_11159808 0 0 0 1 0 0
## AGG_11236448 0 1 0 1 0 0
## AHL_11239959 0 1 0 0 0 0
## AJGD_11119689 1 1 0 1 0 0
## AMP_11228639 0 1 0 0 0 0
## ACF_HR1 ACF_HR2 ACF_HR3 ACF_HR4 ACF_HR5 ACF_HR6
## ACR_11231843 1 0.5747954 0.4244149 0.3898310 0.3054550 0.2987537
## ADAO_11159808 1 0.6805727 0.5935279 0.5085316 0.4365390 0.3660983
## AGG_11236448 1 0.7659893 0.6522822 0.5752187 0.5026580 0.4312281
## AHL_11239959 1 0.7330013 0.6576631 0.6158813 0.5836972 0.5097892
## AJGD_11119689 1 0.4856503 0.4165173 0.3766304 0.3176037 0.3071462
## AMP_11228639 1 0.6595950 0.6178051 0.6037129 0.5651124 0.5755787
## ACF_HR7 ACF_HR8 ACF_HR9 ACF_HR10 ACF_HR11 ACF_HR12
## ACR_11231843 0.2466085 0.1833401 0.1800060 0.1590625 0.1193108 0.1028016
## ADAO_11159808 0.3061974 0.2645815 0.2238202 0.1822452 0.1658125 0.1617351
## AGG_11236448 0.4003839 0.3616214 0.3484616 0.3680116 0.3937240 0.3530302
## AHL_11239959 0.4615090 0.4253346 0.3663603 0.3350366 0.3211704 0.3012808
## AJGD_11119689 0.2873193 0.2504552 0.2382239 0.2213956 0.1841389 0.1555994
## AMP_11228639 0.5525003 0.5484309 0.5151089 0.5260231 0.5356568 0.5412524
## ACF_HR13 ACF_HR14 ACF_HR15 ACF_HR16 ACF_HR17
## ACR_11231843 0.08907378 0.02692387 0.02098007 0.01292424 0.0006154294
## ADAO_11159808 0.14100383 0.13026706 0.13321015 0.13066704 0.1285902414
## AGG_11236448 0.34635666 0.35754880 0.32532530 0.27518679 0.2299503431
## AHL_11239959 0.29708129 0.26719489 0.25676612 0.24100162 0.2420480921
## AJGD_11119689 0.19398733 0.15437946 0.18278084 0.17649073 0.1750699756
## AMP_11228639 0.53812733 0.53846799 0.54239706 0.55188989 0.5360942030
## ACF_HR18 ACF_HR19 ACF_HR20 ACF_HR21 ACF_HR22
## ACR_11231843 0.004233393 -0.006757205 -0.007036055 -0.01496791 -0.02636549
## ADAO_11159808 0.110254753 0.127414548 0.087625607 0.07760646 0.09671878
## AGG_11236448 0.206868669 0.184265903 0.151694667 0.14302509 0.12456011
## AHL_11239959 0.198732377 0.197691766 0.174912058 0.19319050 0.21205775
## AJGD_11119689 0.190130736 0.148089255 0.169290584 0.18741791 0.23496989
## AMP_11228639 0.531989902 0.471925620 0.484153622 0.49282071 0.49739289
## ACF_HR23 ACF_HR24 ACF_HR25 ACF_HR26 ACF_HR27
## ACR_11231843 -0.001402886 0.01324269 0.02086305 0.02388871 -0.01035748
## ADAO_11159808 0.104156536 0.10125899 0.07996171 0.07672970 0.09223638
## AGG_11236448 0.131032068 0.15374218 0.13654930 0.11173266 0.10480140
## AHL_11239959 0.206312141 0.18457331 0.17656272 0.19223655 0.17530399
## AJGD_11119689 0.197737591 0.19868995 0.18905268 0.22099258 0.18826589
## AMP_11228639 0.498560780 0.47651016 0.47433498 0.49124269 0.46617148
## ACF_HR28 ACF_HR29 ACF_HR30 ACF_HR31 ACF_HR32
## ACR_11231843 -0.03510893 -0.04784269 -0.05506440 -0.008593307 0.04976843
## ADAO_11159808 0.06817663 0.09274641 0.06786673 0.071169346 0.07319753
## AGG_11236448 0.09948863 0.06725227 0.07242530 0.075139440 0.09715155
## AHL_11239959 0.13561442 0.14198217 0.15516364 0.104568650 0.08413169
## AJGD_11119689 0.20099552 0.18028969 0.14769550 0.165832019 0.11516172
## AMP_11228639 0.47121446 0.49358464 0.45935156 0.460468415 0.43672572
## ACF_HR33 ACF_HR34 ACF_HR35 ACF_HR36 ACF_HR37 ACF_HR38
## ACR_11231843 0.09740572 0.06713914 0.04049820 0.01295611 0.03353650 0.02670075
## ADAO_11159808 0.07839764 0.04368361 0.05899972 0.06957130 0.06502349 0.04428139
## AGG_11236448 0.11831388 0.15119247 0.16481099 0.17926504 0.17486282 0.17939105
## AHL_11239959 0.10251633 0.09851785 0.13082361 0.13043217 0.13487428 0.11851440
## AJGD_11119689 0.12166889 0.13947771 0.13992780 0.11603972 0.12556575 0.13822745
## AMP_11228639 0.43495816 0.42293683 0.41076414 0.39748507 0.38724889 0.38415061
## ACF_HR39 ACF_HR40 ACF_HR41 ACF_HR42 ACF_HR43
## ACR_11231843 0.05435805 0.05163509 0.04581221 0.03625811 0.006093607
## ADAO_11159808 0.08720689 0.08245975 0.07325789 0.02199331 0.010335726
## AGG_11236448 0.16038387 0.14700455 0.14244461 0.15426774 0.162140790
## AHL_11239959 0.13412057 0.14253952 0.14241159 0.13874827 0.152605118
## AJGD_11119689 0.08786577 0.11241510 0.12355936 0.14482627 0.151343806
## AMP_11228639 0.37327204 0.38356428 0.34057877 0.35432214 0.350061870
## ACF_HR44 ACF_HR45 ACF_HR46 ACF_HR47 ACF_HR48
## ACR_11231843 0.015911426 0.065718135 -0.044529286 0.02727482 0.03598471
## ADAO_11159808 0.005374176 0.008207619 0.005354707 0.04384913 0.07181361
## AGG_11236448 0.167102275 0.193588145 0.189538586 0.17553872 0.17449402
## AHL_11239959 0.122308403 0.116989766 0.097048192 0.09234721 0.08137636
## AJGD_11119689 0.125769167 0.114462985 0.147833838 0.11542335 0.14404906
## AMP_11228639 0.362196920 0.348115576 0.328500927 0.32673600 0.28805596
## ACF_HR49 ACF_HR50 ACF_SatO21 ACF_SatO22 ACF_SatO23 ACF_SatO24
## ACR_11231843 0.01538854 0.03556238 1 0.5082890 0.3997243 0.3055021
## ADAO_11159808 0.07248583 0.09748668 1 0.7960148 0.7358783 0.7023573
## AGG_11236448 0.19356802 0.19300176 1 0.4506480 0.4176547 0.3266226
## AHL_11239959 0.05817498 0.05753825 1 0.6522007 0.4130156 0.3200723
## AJGD_11119689 0.07111346 0.10372669 1 0.6469179 0.5880904 0.5481336
## AMP_11228639 0.28570880 0.27435691 1 0.3765707 0.3564648 0.2828661
## ACF_SatO25 ACF_SatO26 ACF_SatO27 ACF_SatO28 ACF_SatO29
## ACR_11231843 0.3009323 0.2725452 0.2828684 0.2414714 0.2329953
## ADAO_11159808 0.7034097 0.6571457 0.6266538 0.6305916 0.6168577
## AGG_11236448 0.3352883 0.2867152 0.3288047 0.2209685 0.2946372
## AHL_11239959 0.3440361 0.3568053 0.3408829 0.3217067 0.3072727
## AJGD_11119689 0.5084136 0.4652971 0.4792604 0.4275522 0.3935165
## AMP_11228639 0.2304710 0.2216612 0.1820556 0.1759353 0.1038674
## ACF_SatO210 ACF_SatO211 ACF_SatO212 ACF_SatO213 ACF_SatO214
## ACR_11231843 0.18395173 0.1467580 0.12033122 0.08302370 0.08933466
## ADAO_11159808 0.59417999 0.6000893 0.60607048 0.60015069 0.58930211
## AGG_11236448 0.24923513 0.2791107 0.25321237 0.23780416 0.18830545
## AHL_11239959 0.25540711 0.2257664 0.22431145 0.24252531 0.26020561
## AJGD_11119689 0.35060010 0.3330913 0.30196103 0.25973001 0.21434187
## AMP_11228639 0.07915669 0.1212448 0.08580592 0.08996543 0.07081004
## ACF_SatO215 ACF_SatO216 ACF_SatO217 ACF_SatO218 ACF_SatO219
## ACR_11231843 0.05687640 0.05274341 0.06126060 0.08978425 0.02651201
## ADAO_11159808 0.59429754 0.56563344 0.55562686 0.55569350 0.55287474
## AGG_11236448 0.29476738 0.24013660 0.29281568 0.24484356 0.27312417
## AHL_11239959 0.23401364 0.19351172 0.17366679 0.14454853 0.11669907
## AJGD_11119689 0.22249353 0.21308087 0.18224214 0.17989537 0.16992608
## AMP_11228639 0.05479702 0.00521051 0.02134299 0.04160247 0.07214742
## ACF_SatO220 ACF_SatO221 ACF_SatO222 ACF_SatO223 ACF_SatO224
## ACR_11231843 0.078210876 0.05633258 0.007953371 0.02307094 0.05578342
## ADAO_11159808 0.553927137 0.53920741 0.525401555 0.53638341 0.53743580
## AGG_11236448 0.178482125 0.22141972 0.170925445 0.15660776 0.11918869
## AHL_11239959 0.149636558 0.16472810 0.173167686 0.16825227 0.13524343
## AJGD_11119689 0.118883946 0.14000960 0.165611015 0.12406405 0.14798910
## AMP_11228639 -0.004522566 0.02031293 0.111043640 0.09620470 0.07466595
## ACF_SatO225 ACF_SatO226 ACF_SatO227 ACF_SatO228 ACF_SatO229
## ACR_11231843 0.05754983 0.01633989 0.03651988 0.03014404 0.03653216
## ADAO_11159808 0.53454517 0.54348362 0.52573474 0.51981495 0.50410946
## AGG_11236448 0.15172497 0.14043635 0.13107597 0.09380520 0.10983555
## AHL_11239959 0.08820634 0.09185565 0.09736712 0.07803585 0.06991932
## AJGD_11119689 0.20780663 0.16061276 0.21066161 0.18263507 0.18302649
## AMP_11228639 0.05033299 0.01874388 0.06265166 0.01221945 0.06245386
## ACF_SatO230 ACF_SatO231 ACF_SatO232 ACF_SatO233 ACF_SatO234
## ACR_11231843 0.026885553 0.018244632 0.03425803 0.03731653 0.08545823
## ADAO_11159808 0.503118465 0.488398734 0.47360713 0.48528722 0.49035453
## AGG_11236448 0.073366759 0.122373113 0.07317205 0.09085445 0.11009158
## AHL_11239959 0.089334524 0.053043299 0.04793588 0.03307846 0.04347559
## AJGD_11119689 0.167654826 0.196464213 0.20440416 0.24276020 0.21465966
## AMP_11228639 0.008097181 -0.006432008 -0.04581565 -0.01922558 0.02923064
## ACF_SatO235 ACF_SatO236 ACF_SatO237 ACF_SatO238 ACF_SatO239
## ACR_11231843 0.08212560 0.04122430 0.02554620 0.04391622 0.02081907
## ADAO_11159808 0.48337710 0.47154277 0.47153753 0.46864689 0.43414012
## AGG_11236448 0.11357759 0.14531284 0.11294811 0.06122085 0.12123779
## AHL_11239959 0.01617575 -0.02722358 -0.04350629 -0.03287029 0.02951063
## AJGD_11119689 0.17716046 0.15298218 0.13635243 0.12934334 0.10067326
## AMP_11228639 -0.03056929 0.01440266 0.01397162 -0.01940303 0.03909295
## ACF_SatO240 ACF_SatO241 ACF_SatO242 ACF_SatO243 ACF_SatO244
## ACR_11231843 0.058707749 0.03614286 0.01274100 0.04554895 0.078224804
## ADAO_11159808 0.423147789 0.40624091 0.40820718 0.40425891 0.383480876
## AGG_11236448 0.127562364 0.06081561 0.11997640 0.09993272 0.085353394
## AHL_11239959 0.069235684 0.06395972 0.07775235 0.05163507 0.029005208
## AJGD_11119689 0.079159171 0.05660577 0.03686780 0.00625108 0.004644364
## AMP_11228639 0.001048235 0.08743216 0.01282564 0.06121771 0.114447275
## ACF_SatO245 ACF_SatO246 ACF_SatO247 ACF_SatO248 ACF_SatO249
## ACR_11231843 0.03627727 0.03475069 -0.01733608 -0.007981913 -0.05040679
## ADAO_11159808 0.36664587 0.35974033 0.36459200 0.337971294 0.32803660
## AGG_11236448 0.10265570 0.03053047 0.04854476 0.065612107 0.04693021
## AHL_11239959 0.05880395 0.05077536 0.04689787 0.040638027 0.04475613
## AJGD_11119689 0.03616619 0.02081380 -0.01999312 -0.019823713 -0.02207396
## AMP_11228639 0.06079782 0.07559045 0.07100495 0.064710841 0.03629374
## ACF_SatO250 CCF_1 CCF_2 CCF_3
## ACR_11231843 -0.034433928 0.0004546919 3.058839e-03 0.0040423309
## ADAO_11159808 0.342673967 -0.0002186104 -5.096438e-05 -0.0006558311
## AGG_11236448 0.093592552 -0.0002833700 -2.303755e-03 -0.0039102611
## AHL_11239959 0.039269127 0.0020470889 3.135213e-03 0.0011678422
## AJGD_11119689 -0.063125407 0.0002870475 1.449830e-04 0.0004194096
## AMP_11228639 -0.005938136 -0.0011565578 -1.132326e-04 -0.0008998812
## CCF_4 CCF_5 CCF_6 CCF_7
## ACR_11231843 0.0042604543 2.982095e-03 0.0008334624 -0.0003565855
## ADAO_11159808 -0.0004881851 -1.093052e-03 -0.0023416790 -0.0030969976
## AGG_11236448 -0.0030435705 -6.105371e-03 -0.0102664720 -0.0104252073
## AHL_11239959 0.0043663205 4.746182e-03 0.0050084112 0.0081714646
## AJGD_11119689 0.0013627458 7.789485e-04 0.0004349491 0.0013404225
## AMP_11228639 -0.0020682374 4.315214e-05 -0.0003246939 0.0014815937
## CCF_8 CCF_9 CCF_10 CCF_11
## ACR_11231843 -0.0039294457 -0.005828664 -0.0057106273 -0.004011007
## ADAO_11159808 -0.0045047567 -0.004920112 -0.0071307634 -0.007653171
## AGG_11236448 -0.0093722480 -0.013352683 -0.0142401529 -0.011610016
## AHL_11239959 0.0040492172 0.005643296 0.0049587033 0.005221112
## AJGD_11119689 0.0019934772 0.002192178 0.0026054349 0.002589580
## AMP_11228639 0.0008153953 0.001464314 0.0005398061 -0.002547857
## CCF_12 CCF_13 CCF_14 CCF_15
## ACR_11231843 -0.0016728658 0.001311289 0.002263739 0.0012834979
## ADAO_11159808 -0.0081770250 -0.011180442 -0.011595797 -0.0134216392
## AGG_11236448 -0.0161311349 -0.019307868 -0.016119532 -0.0186554317
## AHL_11239959 0.0117062279 0.007839770 0.008463277 0.0135671149
## AJGD_11119689 0.0021572339 0.001989928 0.002819675 0.0027028528
## AMP_11228639 0.0002269186 -0.003891850 -0.003790363 -0.0005099842
## CCF_16 CCF_17 CCF_18 CCF_19
## ACR_11231843 -0.001549684 0.0120848708 1.402588e-02 -0.0005688282
## ADAO_11159808 -0.015277861 -0.0135348097 -1.306915e-02 -0.0135452677
## AGG_11236448 -0.029165734 -0.0308020851 -3.339237e-02 -0.0385093928
## AHL_11239959 0.013135266 0.0164668146 9.564670e-03 0.0036165832
## AJGD_11119689 0.001260832 0.0009546951 1.751155e-05 0.0001531078
## AMP_11228639 0.003437316 0.0058448319 3.717335e-03 0.0067373010
## CCF_20 CCF_21 CCF_22 CCF_23 CCF_24
## ACR_11231843 -0.023470192 -0.027826618 -0.003238727 0.015534503 0.008140686
## ADAO_11159808 -0.013970749 -0.013732218 -0.016981566 -0.019808491 -0.019144643
## AGG_11236448 -0.043459668 -0.040046886 -0.038553263 -0.044806016 -0.047894190
## AHL_11239959 0.003357061 0.003704613 0.003197076 0.004258693 0.007991851
## AJGD_11119689 -0.001036495 -0.002918496 -0.001426226 -0.001443727 -0.003804005
## AMP_11228639 0.005783935 0.006381636 0.003569838 0.002588991 -0.001658051
## CCF_25 CCF_26 CCF_27 CCF_28 CCF_29
## ACR_11231843 -0.002989521 -0.003111799 0.002550076 0.001198391 -0.009986409
## ADAO_11159808 -0.019461626 -0.019192715 -0.019299933 -0.020785811 -0.023809481
## AGG_11236448 -0.046050599 -0.051025772 -0.061949405 -0.069061214 -0.072158563
## AHL_11239959 0.010228070 0.014847014 0.016164241 0.014334421 0.004460782
## AJGD_11119689 -0.003986441 -0.002642870 -0.004457437 -0.007619795 -0.005406039
## AMP_11228639 -0.002638823 -0.007562818 -0.012663710 -0.006551292 -0.003371610
## CCF_30 CCF_31 CCF_32 CCF_33 CCF_34
## ACR_11231843 0.0051735312 0.004855361 0.011590358 0.025815799 0.039122798
## ADAO_11159808 -0.0235752894 -0.027446698 -0.028005272 -0.025929491 -0.024415011
## AGG_11236448 -0.0926706241 -0.093279037 -0.081288185 -0.083957724 -0.094464480
## AHL_11239959 -0.0004750371 -0.004759260 -0.007601417 -0.018404827 -0.028743262
## AJGD_11119689 -0.0060789668 -0.005123010 -0.006110739 -0.005856458 -0.004762471
## AMP_11228639 -0.0057502575 -0.007770127 -0.010160671 -0.013466310 -0.019729555
## CCF_35 CCF_36 CCF_37 CCF_38 CCF_39
## ACR_11231843 0.02979790 0.022310953 0.019525405 0.0267977661 0.019980920
## ADAO_11159808 -0.02248390 -0.021127101 -0.020999961 -0.0184135111 -0.015064675
## AGG_11236448 -0.07319993 -0.076255893 -0.077424521 -0.0637237815 -0.048401469
## AHL_11239959 -0.03336860 -0.031705709 -0.038499498 -0.0273063815 -0.022250113
## AJGD_11119689 -0.00726760 -0.004965982 -0.001253485 0.0003377283 0.003303363
## AMP_11228639 -0.02726893 -0.025642729 -0.035918737 -0.0291796161 -0.033971332
## CCF_40 CCF_41 CCF_42 CCF_43 CCF_44
## ACR_11231843 0.021127655 0.023193368 0.034513084 0.030099924 0.028115605
## ADAO_11159808 -0.015116921 -0.013214738 -0.009578018 -0.008788311 -0.010873104
## AGG_11236448 -0.050523824 -0.051225010 -0.043811558 -0.049631757 -0.047960897
## AHL_11239959 -0.019776768 -0.008137230 -0.006686933 -0.001441107 0.001068917
## AJGD_11119689 0.004570806 0.006076082 0.005484539 0.001759943 0.003981980
## AMP_11228639 -0.035383382 -0.017982636 -0.019930411 -0.015593445 -0.016972870
## CCF_45 CCF_46 CCF_47 CCF_48
## ACR_11231843 0.017691357 0.004728547 -0.0023334312 0.0008733767
## ADAO_11159808 -0.011768748 -0.011878859 -0.0101097684 -0.0120354292
## AGG_11236448 -0.036007611 -0.032215460 -0.0263738728 -0.0163090961
## AHL_11239959 -0.005689447 -0.005087718 -0.0118034211 -0.0153702843
## AJGD_11119689 0.003448491 0.001105411 0.0006321922 0.0030234418
## AMP_11228639 -0.014132569 -0.007595835 -0.0183720294 -0.0009194270
## CCF_49 CCF_50 CCF_51 CCF_52 CCF_53
## ACR_11231843 0.015095606 0.043698740 0.034964725 0.012489399 -0.032259227
## ADAO_11159808 -0.003864899 -0.010719307 -0.008267396 -0.005222357 -0.011855426
## AGG_11236448 -0.003414610 0.005852453 -0.001868456 -0.001059504 0.007899973
## AHL_11239959 -0.014620406 -0.015515727 -0.012886031 -0.020222454 -0.023264809
## AJGD_11119689 0.001261241 -0.002953830 -0.004101116 -0.005079172 -0.001891924
## AMP_11228639 -0.010292795 -0.013804737 -0.006739147 -0.007855929 -0.005287801
## CCF_54 CCF_55 CCF_56 CCF_57 CCF_58
## ACR_11231843 -0.063290091 -0.053885402 -0.044900864 -0.027834995 -0.04257100
## ADAO_11159808 -0.009850531 -0.003641662 -0.006122838 -0.003960258 -0.00639080
## AGG_11236448 0.001726306 0.013550687 0.034409008 0.032746626 0.03876397
## AHL_11239959 -0.018102872 -0.019863634 -0.009675012 -0.013720427 -0.01115499
## AJGD_11119689 -0.006949605 -0.006641876 -0.006902861 -0.009590861 -0.01588806
## AMP_11228639 -0.014707599 -0.007742001 -0.019229791 -0.016685292 -0.01522464
## CCF_59 CCF_60 CCF_61 CCF_62
## ACR_11231843 -0.029498962 -0.007092104 -0.004249248 -0.0237963701
## ADAO_11159808 -0.012194938 -0.012880817 -0.014362355 -0.0078496896
## AGG_11236448 0.059812204 0.064905549 0.049552850 0.0766688491
## AHL_11239959 -0.002991042 -0.007571676 -0.001045690 -0.0025250528
## AJGD_11119689 -0.016025706 -0.007478115 -0.009071154 -0.0006867116
## AMP_11228639 -0.012008412 -0.016714435 -0.017694422 -0.0231633647
## CCF_63 CCF_64 CCF_65 CCF_66 CCF_67
## ACR_11231843 -0.0231960976 -0.050146958 -0.052115220 -0.048797085 -0.026390227
## ADAO_11159808 -0.0117905380 -0.009688717 -0.012342044 -0.009288326 -0.009201692
## AGG_11236448 0.0850389974 0.077232596 0.077041452 0.082122001 0.077159311
## AHL_11239959 -0.0027261169 0.008025126 0.014233033 0.011265648 0.016722811
## AJGD_11119689 0.0005062532 0.003351400 0.008152499 -0.001115105 0.003591707
## AMP_11228639 -0.0376834237 -0.031009823 -0.029203701 -0.022526338 -0.020968759
## CCF_68 CCF_69 CCF_70 CCF_71
## ACR_11231843 -0.015458548 -0.023928162 -0.0067536429 -0.0107471883
## ADAO_11159808 -0.003936041 -0.006874359 -0.0022076225 0.0005394056
## AGG_11236448 0.087121007 0.071732905 0.0892125073 0.0691595862
## AHL_11239959 0.015076745 0.019796809 0.0255843728 0.0268484078
## AJGD_11119689 -0.005624475 -0.002319608 -0.0004026998 -0.0011609980
## AMP_11228639 -0.007972429 -0.014272650 -0.0184297325 -0.0135004319
## CCF_72 CCF_73 CCF_74 CCF_75
## ACR_11231843 -0.0050098466 -0.0084895781 -0.0044494281 0.0038419970
## ADAO_11159808 -0.0008090407 0.0007763252 0.0004260694 0.0001886526
## AGG_11236448 0.0650604697 0.0674499168 0.0772306014 0.0575508828
## AHL_11239959 0.0260525238 0.0173458911 0.0156477079 0.0194211297
## AJGD_11119689 -0.0028597845 -0.0120831827 -0.0113133268 -0.0137173465
## AMP_11228639 -0.0126873388 -0.0100442896 -0.0170506385 -0.0147850900
## CCF_76 CCF_77 CCF_78 CCF_79 CCF_80
## ACR_11231843 0.012179987 0.021109933 0.019716500 0.010810144 -0.007344863
## ADAO_11159808 -0.006884199 -0.003154894 -0.008710207 -0.003748353 -0.004351773
## AGG_11236448 0.061011000 0.056680470 0.048902849 0.037629264 0.036355374
## AHL_11239959 0.015635446 0.022641596 0.023206646 0.030377997 0.033418869
## AJGD_11119689 -0.009793682 -0.016502857 -0.011393416 -0.013597074 -0.011733272
## AMP_11228639 -0.022828706 -0.027329152 -0.028342411 -0.029764400 -0.034194206
## CCF_81 CCF_82 CCF_83 CCF_84
## ACR_11231843 -0.025795314 -0.016740661 -0.0074376644 -0.0202051183
## ADAO_11159808 -0.004202934 -0.002922812 0.0012433831 -0.0041846245
## AGG_11236448 0.026828953 0.024915464 0.0400122043 0.0477331734
## AHL_11239959 0.036958460 0.048105978 0.0571783884 0.0553257144
## AJGD_11119689 -0.003035092 -0.007107151 0.0003619212 -0.0008142114
## AMP_11228639 -0.036292493 -0.027521342 -0.0382333992 -0.0318572180
## CCF_85 CCF_86 CCF_87 CCF_88
## ACR_11231843 -0.040368282 -0.0421427932 -0.037992387 -0.0402962341
## ADAO_11159808 -0.012154401 -0.0001256478 0.002372556 0.0009430972
## AGG_11236448 0.027460311 0.0227268736 -0.010222478 -0.0196085035
## AHL_11239959 0.055676740 0.0603232663 0.057482791 0.0535364550
## AJGD_11119689 0.004651249 0.0185391318 0.011099193 0.0262233816
## AMP_11228639 -0.016810513 -0.0156801982 -0.015800794 -0.0144179732
## CCF_89 CCF_90 CCF_91 CCF_92 CCF_93
## ACR_11231843 -0.036706207 -0.023937107 -0.04455039 -0.056521970 -0.074324801
## ADAO_11159808 0.003312549 0.004184715 0.01057152 0.010937359 0.008935026
## AGG_11236448 -0.009713435 -0.027188223 -0.02529531 -0.021398218 -0.021074770
## AHL_11239959 0.053775830 0.054227796 0.05554975 0.052201281 0.062686742
## AJGD_11119689 0.025924736 0.024009404 0.02563622 0.023432609 0.027377835
## AMP_11228639 0.008899564 -0.007586326 -0.01486855 -0.004057226 -0.025856927
## CCF_94 CCF_95 CCF_96 CCF_97 CCF_98
## ACR_11231843 -0.083478430 -0.10681707 -0.125702657 -0.12003382 -0.11782146
## ADAO_11159808 0.011980065 0.01243704 0.008419520 0.01427251 0.01084089
## AGG_11236448 -0.020875257 -0.01682383 -0.045337979 -0.02380581 -0.01616292
## AHL_11239959 0.062028477 0.06160279 0.052983340 0.05330678 0.04825501
## AJGD_11119689 0.021037581 0.02992081 0.023016775 0.02479881 0.02398495
## AMP_11228639 -0.004451642 -0.02393063 -0.003333842 -0.01668824 -0.01192308
## CCF_99 CCF_100 Mean_SC_FC_P2_1 Mean_SC_FC_P2_2
## ACR_11231843 -0.09759510 -0.076168771 0.13118665 0.59114972
## ADAO_11159808 0.01162336 0.001268052 -0.14017217 0.57299155
## AGG_11236448 -0.01008677 -0.005673891 1.03293097 -0.06342918
## AHL_11239959 0.03610457 0.022415838 0.93119030 0.75409673
## AJGD_11119689 0.01281251 0.024077587 -0.08247117 0.59184154
## AMP_11228639 -0.01210488 -0.033906625 0.48068506 1.21058864
## Mean_SC_FC_P2_3 Mean_SC_FC_P2_4 Mean_SC_FC_P2_5 Mean_SC_FC_P2_6
## ACR_11231843 0.19165128 0.1074327 -0.1365853 -0.49505415
## ADAO_11159808 0.70368445 -0.5483858 -0.3450857 -0.27893253
## AGG_11236448 -0.55589376 -0.6340789 -0.2959537 -0.05429066
## AHL_11239959 -0.01756826 -0.4525594 0.2261159 -0.45365939
## AJGD_11119689 0.85456078 -0.1009241 -0.4785830 -0.58804932
## AMP_11228639 0.27439637 -1.0158997 -0.6601790 -0.53567673
## Mean_SC_FC_P2_7 Mean_SC_FC_P2_8 Var_SC_FC_P2_1 Var_SC_FC_P2_2
## ACR_11231843 -0.03077218 -0.3590087 1.3007990 0.2610902
## ADAO_11159808 -0.15308011 0.1889803 0.6324175 0.3342474
## AGG_11236448 0.05537156 0.5153436 1.2891377 1.5759236
## AHL_11239959 -0.13687090 -0.8507450 0.6535629 0.8455883
## AJGD_11119689 -0.07746699 -0.1189078 0.9663248 1.4439244
## AMP_11228639 -0.12746166 0.3735470 0.5654726 0.2876600
## Var_SC_FC_P2_3 Var_SC_FC_P2_4 Var_SC_FC_P2_5 Var_SC_FC_P2_6
## ACR_11231843 0.6146499 0.8959849 0.4520091 0.3348941
## ADAO_11159808 2.1218941 0.8584093 1.0584002 0.4747970
## AGG_11236448 0.2145333 0.2616872 1.0427258 0.2637706
## AHL_11239959 0.7845836 0.1416397 1.0759227 0.3100459
## AJGD_11119689 0.6566901 0.7912407 0.7406210 0.7552193
## AMP_11228639 0.9642212 0.3800796 0.8936375 0.2382078
## Var_SC_FC_P2_7 Var_SC_FC_P2_8 Mean_SC_SO2_P2_1 Mean_SC_SO2_P2_2
## ACR_11231843 0.9436352 2.4933699 0.9319583 -0.3238440
## ADAO_11159808 0.5518705 0.6629801 -0.6227316 -1.3327888
## AGG_11236448 0.5170698 0.7762795 0.5183974 0.7083813
## AHL_11239959 1.4559008 0.1663170 0.2797322 -0.2493144
## AJGD_11119689 0.4741938 0.5692215 0.4436303 0.1040224
## AMP_11228639 0.6482557 0.3961962 0.2617306 0.6274932
## Mean_SC_SO2_P2_3 Mean_SC_SO2_P2_4 Mean_SC_SO2_P2_5
## ACR_11231843 -0.02308460 0.1879746 -0.2710792
## ADAO_11159808 -0.73725694 0.9348132 0.5110694
## AGG_11236448 0.16067464 -0.3612576 -0.9862875
## AHL_11239959 -0.75574665 -0.4416949 0.3085892
## AJGD_11119689 -0.54151316 0.1250725 -0.4011793
## AMP_11228639 -0.09106785 -0.4948308 -0.1492371
## Mean_SC_SO2_P2_6 Mean_SC_SO2_P2_7 Mean_SC_SO2_P2_8
## ACR_11231843 0.06661556 -0.04419052 -0.52435020
## ADAO_11159808 -0.06155738 0.71721501 0.59123712
## AGG_11236448 0.11556939 -0.25171623 0.09623857
## AHL_11239959 0.34706536 0.47211273 0.03925646
## AJGD_11119689 -0.68184699 0.37065672 0.58115746
## AMP_11228639 0.03895749 -0.05364113 -0.13940455
## Var_SatO2_FC_P2_1 Var_SatO2_FC_P2_2 Var_SatO2_FC_P2_3
## ACR_11231843 0.8364830 0.5040869 0.4385987
## ADAO_11159808 0.3868125 0.2955781 0.7522836
## AGG_11236448 1.0605221 0.4007461 0.1898382
## AHL_11239959 0.4609662 0.9526070 3.5319776
## AJGD_11119689 0.4299697 0.6975033 1.4749659
## AMP_11228639 1.9656012 0.5586104 0.9365262
## Var_SatO2_FC_P2_4 Var_SatO2_FC_P2_5 Var_SatO2_FC_P2_6
## ACR_11231843 0.7591032 2.1513446 0.1449632
## ADAO_11159808 0.1339172 0.5360688 0.3279903
## AGG_11236448 0.9753277 1.2499935 0.4642507
## AHL_11239959 0.4482635 0.2167924 0.2988423
## AJGD_11119689 1.0476319 0.7900298 1.8134285
## AMP_11228639 0.9368595 0.9677096 0.7508307
## Var_SatO2_FC_P2_7 Var_SatO2_FC_P2_8 Mean_Q_P2_1 Mean_Q_P2_2
## ACR_11231843 0.5181019 1.37873603 0.9542725 0.9819488
## ADAO_11159808 0.7377448 0.15792620 0.5570747 0.7249887
## AGG_11236448 0.5512740 1.20743193 0.4867412 0.2524360
## AHL_11239959 0.1862119 0.65338827 0.7932506 0.7302792
## AJGD_11119689 0.1791949 0.04100605 0.2655324 0.5257116
## AMP_11228639 0.5436414 0.68445639 0.7867395 0.8954698
## Mean_Q_P2_3 Mean_Q_P2_4 Mean_Q_P2_5 Mean_Q_P2_6 Mean_Q_P2_7
## ACR_11231843 0.96740144 0.95919540 0.9542325 0.9390559 0.9541947
## ADAO_11159808 0.69016171 0.45262153 0.5011029 0.5244741 0.5503328
## AGG_11236448 0.06112992 0.05768611 0.1434693 0.1469481 0.1751905
## AHL_11239959 0.50100816 0.36737763 0.5616739 0.3659843 0.4562215
## AJGD_11119689 0.62585176 0.29639805 0.1762554 0.1896599 0.2939501
## AMP_11228639 0.72774806 0.45976473 0.5411093 0.5783251 0.6558104
## Mean_Q_P2_8 Var_Q_P2_1 Var_Q_P2_2 Var_Q_P2_3 Var_Q_P2_4
## ACR_11231843 0.9264839 0.001873925 0.0001232204 0.0005450126 0.001252521
## ADAO_11159808 0.6297041 0.034781094 0.0128804856 0.0385215303 0.042209627
## AGG_11236448 0.3250607 0.096529734 0.0904903362 0.0056246817 0.006709223
## AHL_11239959 0.2390536 0.038693719 0.0490955771 0.0764341797 0.016172245
## AJGD_11119689 0.2733735 0.100046480 0.1498814949 0.0848383357 0.106828330
## AMP_11228639 0.7613643 0.018101568 0.0032347736 0.0310418934 0.021193749
## Var_Q_P2_5 Var_Q_P2_6 Var_Q_P2_7 Var_Q_P2_8
## ACR_11231843 0.002107901 0.0009429271 0.0009875804 0.01639766
## ADAO_11159808 0.051555431 0.0259336937 0.0253229265 0.02954122
## AGG_11236448 0.072906892 0.0198022558 0.0440500091 0.07919954
## AHL_11239959 0.082331397 0.0329750894 0.1114636743 0.01685491
## AJGD_11119689 0.079328855 0.0674732621 0.0708786050 0.08321515
## AMP_11228639 0.044387179 0.0132279654 0.0252169177 0.01186504
#table(second.model.df$DETERIORO)
set.seed(12345)
second.model.df <- as.data.frame(sapply(second.model.df, as.numeric))
second.model.df$DETERIORO <- factor(second.model.df$DETERIORO)
second.newMWMOTE_FIN <- imbalance::oversample(second.model.df, ratio = 0.80, method = "SMOTE", classAttr = "DETERIORO")
second.newMWMOTE_FIN <- data.frame(second.newMWMOTE_FIN)
set.seed(12345)
#Treatment of Categorical Values
pos_1 <- get_column_position(second.newMWMOTE_FIN, "SAPI_0_8h")
pos_2 <- get_column_position(second.newMWMOTE_FIN, "SCORE_WOOD_DOWNES_INGRESO")
pos_3 <- get_column_position(second.newMWMOTE_FIN, "DETERIORO")
columns_to_round <- setdiff(pos_1:pos_2, pos_3)
second.newMWMOTE_FIN[, columns_to_round] <- lapply(second.newMWMOTE_FIN[, columns_to_round], function(x) round(x, 1))
col_names_factor <- names(second.newMWMOTE_FIN[pos_1:pos_2])
second.newMWMOTE_FIN[col_names_factor] <- lapply(second.newMWMOTE_FIN[col_names_factor] , factor)
#table(newMWMOTE_FIN$DETERIORO)
set.seed(12345)
first.data_partition_FIN_P2 <- caret::createDataPartition(first.newMWMOTE_FIN$DETERIORO,
p = 0.8,
list = FALSE,
times = 1)
first.train_data_FIN_P2_SM <- first.newMWMOTE_FIN[first.data_partition_FIN_P2, ]
first.test_data_FIN_P2_SM <- first.newMWMOTE_FIN[-first.data_partition_FIN_P2, ]
set.seed(12345)
first.RF_FIN_SM <- randomForest::randomForest(DETERIORO ~ ., data = first.train_data_FIN_P2_SM, importance = TRUE)
print(first.RF_FIN_SM)
##
## Call:
## randomForest(formula = DETERIORO ~ ., data = first.train_data_FIN_P2_SM, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 3.95%
## Confusion matrix:
## 1 2 class.error
## 1 41 1 0.02380952
## 2 2 32 0.05882353
#-----ERORES DE PREDICCION TEST--------#
first.DETERIORO2p = predict(first.RF_FIN_SM, newdata = first.test_data_FIN_P2_SM)
first.tabla01 = table(first.test_data_FIN_P2_SM$DETERIORO,first.DETERIORO2p)
first.tabla01
## first.DETERIORO2p
## 1 2
## 1 10 0
## 2 1 7
first.err <- (first.tabla01[1,2]+ first.tabla01[2,1])/sum(first.tabla01)
print(first.err)
## [1] 0.05555556
your_data <- randomForest::importance(first.RF_FIN_SM, type = 1)[order(randomForest::importance(first.RF_FIN_SM, type = 1), decreasing = TRUE),]
your_data <- as.data.frame(your_data)
datatable(your_data, options = list(pageLength = 10), colnames = 'MeanDecreaseAccuracy')
set.seed(12345)
second.data_partition_FIN_P2 <- caret::createDataPartition(second.newMWMOTE_FIN$DETERIORO,
p = 0.8,
list = FALSE,
times = 1)
second.train_data_FIN_P2_SM <- second.newMWMOTE_FIN[second.data_partition_FIN_P2, ]
second.test_data_FIN_P2_SM <- second.newMWMOTE_FIN[-second.data_partition_FIN_P2, ]
set.seed(12345)
second.RF_FIN_SM <- randomForest::randomForest(DETERIORO ~ ., data = second.train_data_FIN_P2_SM, importance = TRUE)
print(second.RF_FIN_SM)
##
## Call:
## randomForest(formula = DETERIORO ~ ., data = second.train_data_FIN_P2_SM, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 16
##
## OOB estimate of error rate: 3.95%
## Confusion matrix:
## 1 2 class.error
## 1 41 1 0.02380952
## 2 2 32 0.05882353
#-----ERORES DE PREDICCION TEST--------#
second.DETERIORO2p = predict(second.RF_FIN_SM, newdata = second.test_data_FIN_P2_SM)
second.tabla01 = table(second.test_data_FIN_P2_SM$DETERIORO,second.DETERIORO2p)
second.tabla01
## second.DETERIORO2p
## 1 2
## 1 10 0
## 2 0 8
second.err <- (second.tabla01[1,2]+second.tabla01[2,1])/sum(second.tabla01)
print(second.err)
## [1] 0
your_data <- randomForest::importance(second.RF_FIN_SM, type = 1)[order(randomForest::importance(second.RF_FIN_SM, type = 1), decreasing = TRUE),]
your_data <- as.data.frame(your_data)
datatable(your_data, options = list(pageLength = 10), colnames = 'MeanDecreaseAccuracy')
print(first.RF_FIN_SM)
##
## Call:
## randomForest(formula = DETERIORO ~ ., data = first.train_data_FIN_P2_SM, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 4
##
## OOB estimate of error rate: 3.95%
## Confusion matrix:
## 1 2 class.error
## 1 41 1 0.02380952
## 2 2 32 0.05882353
print(second.RF_FIN_SM)
##
## Call:
## randomForest(formula = DETERIORO ~ ., data = second.train_data_FIN_P2_SM, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 16
##
## OOB estimate of error rate: 3.95%
## Confusion matrix:
## 1 2 class.error
## 1 41 1 0.02380952
## 2 2 32 0.05882353
# Tratamiento de datos
# ==============================================================================
library(ISLR)
library(dplyr)
library(tidyr)
library(skimr)
# Gráficos
# ==============================================================================
library(ggplot2)
library(ggpubr)
# Preprocesado y modelado
# ==============================================================================
library(tidymodels)
library(ranger)
library(doParallel)
set.seed(12345)
first.trainIndex <- caret::createDataPartition(first.newMWMOTE_FIN$DETERIORO,
p = 0.8,
list = FALSE,
times = 1)
first.datos_train <- first.newMWMOTE_FIN[first.trainIndex, ]
first.datos_test <- first.newMWMOTE_FIN[-first.trainIndex, ]
Esta sección crea un grid de hiperparámetros con diferentes combinaciones de num_trees (número de árboles), mtry (variables a considerar en cada división del árbol) y max_depth (profundidad máxima del árbol) para un modelo de bosque aleatorio
# Grid de hiperparámetros evaluados
# ==============================================================================
first.param_grid = expand_grid(
'num_trees' = c(50, 100, 500, 1000, 5000),
'mtry' = c(3, 5, 7, ncol(first.datos_train)-1),
'max_depth' = c(1, 3, 10, 20)
)
En esta sección, se ajusta un modelo de bosque aleatorio para cada combinación de hiperparámetros en el grid. Luego, se calcula el error out-of-bag (OOB) para cada modelo y se almacena en un vector oob_error
# Loop para ajustar un modelo con cada combinación de hiperparámetros
# ==============================================================================
first.oob_error = rep(NA, nrow(first.param_grid))
for(i in 1:nrow(first.param_grid)){
first.modelo <- ranger(
formula = DETERIORO ~ .,
data = first.datos_train,
num.trees = first.param_grid$num_trees[i],
mtry = first.param_grid$mtry[i],
max.depth = first.param_grid$max_depth[i],
seed = 123
)
first.oob_error[i] <- first.modelo$prediction.error
}
Aquí se crean resultados que contienen las combinaciones de hiperparámetros junto con sus errores OOB correspondientes. Luego, se ordenan los resultados por error OOB de menor a mayor.
# Resultados
# ==============================================================================
first.resultados <- first.param_grid
first.resultados$first.oob_error <- first.oob_error
first.resultados <- first.resultados %>% arrange(first.oob_error) # Order
head(first.resultados)
## # A tibble: 6 × 4
## num_trees mtry max_depth first.oob_error
## <dbl> <dbl> <dbl> <dbl>
## 1 50 3 10 0.0132
## 2 50 3 20 0.0132
## 3 50 7 3 0.0263
## 4 100 3 10 0.0263
## 5 100 3 20 0.0263
## 6 100 5 3 0.0263
Finalmente, esta sección muestra los mejores hiperparámetros encontrados según el error OOB más bajo.
# Mejores hiperparámetros por out-of-bag error
# ==============================================================================
head(first.resultados, 1)
## # A tibble: 1 × 4
## num_trees mtry max_depth first.oob_error
## <dbl> <dbl> <dbl> <dbl>
## 1 50 3 10 0.0132
En esta sección, se define el modelo que se utilizará, que es un modelo de bosque aleatorio (rand_forest) para clasificación. Los hiperparámetros mtry y trees se establecen como parámetros a sintonizar utilizando la función tune(). También se configuran otras opciones del modelo, como el motor (engine) que se utilizará (en este caso, “ranger”), la profundidad máxima del árbol (max.depth), la importancia de las variables (importance), y una semilla aleatoria para la reproducibilidad
# DEFINICIÓN DEL MODELO Y DE LOS HIPERPARÁMETROS A OPTIMIZAR
# ==============================================================================
first.modelo <- rand_forest(
mode = "classification",
mtry = tune(),
trees = tune()
) %>%
set_engine(
engine = "ranger",
max.depth = tune(),
importance = "none",
seed = 123
)
En esta parte, se define el preprocesamiento de los datos utilizando la función recipe. En este caso, no se realiza ningún preprocesamiento, por lo que el transformer solo contiene la definición de la fórmula (DETERIORO ~ .) y los datos de entrenamiento.
# DEFINICIÓN DEL PREPROCESADO
# ==============================================================================
# En este caso no hay preprocesado, por lo que el transformer solo contiene
# la definición de la fórmula y los datos de entrenamiento.
first.transformer <- recipe(
formula = DETERIORO ~ .,
data = first.datos_train
)
Aquí se define la estrategia de validación cruzada para evaluar el modelo. Se utiliza una validación cruzada estratificada de 5 pliegues (vfold_cv) para dividir los datos de entrenamiento en conjuntos de entrenamiento y validación. La estratificación se realiza en función de la variable objetivo DETERIORO
# DEFINICIÓN DE LA ESTRATEGIA DE VALIDACIÓN Y CREACIÓN DE PARTICIONES
# ==============================================================================
set.seed(12345)
first.cv_folds <- vfold_cv(
data = first.datos_train,
v = 5,
strata = DETERIORO
)
Se crea un flujo de trabajo (workflow) que combina el preprocesamiento (transformer) y el modelo (modelo) definidos anteriormente. Esto establece el flujo de trabajo completo para entrenar y evaluar el modelo
# WORKFLOW
# ==============================================================================
first.workflow_modelado <- workflow() %>%
add_recipe(first.transformer) %>%
add_model(first.modelo)
Se crea un grid de hiperparámetros que especifica las diferentes combinaciones de trees, mtry, y max.depth que se probarán durante la optimización de hiperparámetros
# GRID DE HIPERPARÁMETROS
# ==============================================================================
first.hiperpar_grid <- expand_grid(
'trees' = c(50, 100, 500, 1000, 5000),
'mtry' = c(3, 5, 7, ncol(first.datos_train)-1),
'max.depth' = c(1, 3, 10, 20)
)
En esta parte se ejecuta la optimización de hiperparámetros utilizando tune_grid. Se ajusta el flujo de trabajo (workflow_modelado) en múltiples combinaciones de hiperparámetros utilizando la estrategia de validación cruzada definida anteriormente. La métrica de evaluación utilizada es la exactitud (accuracy).
# EJECUCIÓN DE LA OPTIMIZACIÓN DE HIPERPARÁMETROS
set.seed(12345)
# ==============================================================================
first.cl <- makePSOCKcluster(parallel::detectCores() - 1)
registerDoParallel(first.cl)
first.grid_fit <- tune_grid(
object = first.workflow_modelado,
resamples = first.cv_folds,
metrics = metric_set(accuracy),
grid = first.hiperpar_grid
)
stopCluster(first.cl)
En esta parte se ejecuta la optimización de hiperparámetros utilizando tune_grid. Se ajusta el flujo de trabajo (workflow_modelado) en múltiples combinaciones de hiperparámetros utilizando la estrategia de validación cruzada definida anteriormente. La métrica de evaluación utilizada es la exactitud (accuracy).
# Mejores hiperparámetros una vez realizada la validación cruzada
# ==============================================================================
show_best(first.grid_fit, metric = "accuracy", n = 1)
## # A tibble: 1 × 9
## mtry trees max.depth .metric .estimator mean n std_err .config
## <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 3 50 3 accuracy binary 0.988 5 0.0125 Preprocessor1_M…
set.seed(12345)
first.FIN_SM <- randomForest::randomForest(DETERIORO ~ ., data = first.datos_train, mtry = 3, trees = 50, max.depth=3, importance = TRUE)
print(first.FIN_SM)
##
## Call:
## randomForest(formula = DETERIORO ~ ., data = first.datos_train, mtry = 3, trees = 50, max.depth = 3, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 3.95%
## Confusion matrix:
## 1 2 class.error
## 1 41 1 0.02380952
## 2 2 32 0.05882353
#-----ERORES DE PREDICCION TEST--------#
first.DETERIORO2p = predict(first.FIN_SM, newdata = first.datos_test)
first.mat_confusion=table(first.datos_test$DETERIORO,first.DETERIORO2p)
first.err <- (first.mat_confusion[1,2]+first.mat_confusion[2,1])/sum(first.mat_confusion)
print(first.err)
## [1] 0.05555556
your_data <- randomForest::importance(first.FIN_SM, type = 1)[order(randomForest::importance(first.FIN_SM, type = 1), decreasing = TRUE),]
your_data <- as.data.frame(your_data)
datatable(your_data, options = list(pageLength = 10), colnames = 'MeanDecreaseGini')
set.seed(12345)
second.trainIndex <- caret::createDataPartition(second.newMWMOTE_FIN$DETERIORO,
p = 0.8,
list = FALSE,
times = 1)
second.datos_train <- second.newMWMOTE_FIN[second.trainIndex, ]
second.datos_test <- second.newMWMOTE_FIN[-second.trainIndex, ]
# Grid de hiperparámetros evaluados
# ==============================================================================
second.param_grid = expand_grid(
'num_trees' = c(50, 100, 500, 1000, 5000),
'mtry' = c(3, 5, 7, ncol(second.datos_train)-1),
'max_depth' = c(1, 3, 10, 20)
)
# Loop para ajustar un modelo con cada combinación de hiperparámetros
# ==============================================================================
second.oob_error = rep(NA, nrow(second.param_grid))
for(i in 1:nrow(second.param_grid)){
second.modelo <- ranger(
formula = DETERIORO ~ .,
data = second.datos_train,
num.trees = second.param_grid$num_trees[i],
mtry = second.param_grid$mtry[i],
max.depth = second.param_grid$max_depth[i],
seed = 123
)
second.oob_error[i] <- second.modelo$prediction.error
}
# Resultados
# ==============================================================================
second.resultados <- second.param_grid
second.resultados$second.oob_error <- second.oob_error
second.resultados <- second.resultados %>% arrange(second.oob_error) # Order
head(second.resultados)
## # A tibble: 6 × 4
## num_trees mtry max_depth second.oob_error
## <dbl> <dbl> <dbl> <dbl>
## 1 100 3 10 0.0263
## 2 100 3 20 0.0263
## 3 500 3 10 0.0263
## 4 500 3 20 0.0263
## 5 5000 3 10 0.0263
## 6 5000 3 20 0.0263
# Mejores hiperparámetros por out-of-bag error
# ==============================================================================
head(second.resultados, 1)
## # A tibble: 1 × 4
## num_trees mtry max_depth second.oob_error
## <dbl> <dbl> <dbl> <dbl>
## 1 100 3 10 0.0263
# DEFINICIÓN DEL MODELO Y DE LOS HIPERPARÁMETROS A OPTIMIZAR
set.seed(12345)
# ==============================================================================
second.modelo <- rand_forest(
mode = "classification",
mtry = tune(),
trees = tune()
) %>%
set_engine(
engine = "ranger",
max.depth = tune(),
importance = "none",
seed = 123
)
# DEFINICIÓN DEL PREPROCESADO
# ==============================================================================
# En este caso no hay preprocesado, por lo que el transformer solo contiene
# la definición de la fórmula y los datos de entrenamiento.
second.transformer <- recipe(
formula = DETERIORO ~ .,
data = second.datos_train
)
# DEFINICIÓN DE LA ESTRATEGIA DE VALIDACIÓN Y CREACIÓN DE PARTICIONES
# ==============================================================================
set.seed(12345)
second.cv_folds <- vfold_cv(
data = second.datos_train,
v = 5,
strata = DETERIORO
)
# WORKFLOW
set.seed(12345)
# ==============================================================================
second.workflow_modelado <- workflow() %>%
add_recipe(second.transformer) %>%
add_model(second.modelo)
# GRID DE HIPERPARÁMETROS
# ==============================================================================
second.hiperpar_grid <- expand_grid(
'trees' = c(50, 100, 500, 1000, 5000),
'mtry' = c(3, 5, 7, ncol(second.datos_train)-1),
'max.depth' = c(1, 3, 10, 20)
)
# EJECUCIÓN DE LA OPTIMIZACIÓN DE HIPERPARÁMETROS
# ==============================================================================
set.seed(12345)
second.cl <- makePSOCKcluster(parallel::detectCores() - 1)
registerDoParallel(second.cl)
second.grid_fit <- tune_grid(
object = second.workflow_modelado,
resamples = second.cv_folds,
metrics = metric_set(accuracy),
grid = second.hiperpar_grid
)
stopCluster(second.cl)
# Mejores hiperparámetros una vez realizada la validación cruzada
# ==============================================================================
show_best(second.grid_fit, metric = "accuracy", n = 1)
## # A tibble: 1 × 9
## mtry trees max.depth .metric .estimator mean n std_err .config
## <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 5 50 10 accuracy binary 0.960 5 0.0164 Preprocessor1_M…
second.FIN_SM <- randomForest::randomForest(DETERIORO ~ ., data = second.datos_train, mtry = 3, trees = 50, max.depth=10, importance = TRUE)
print(second.FIN_SM)
##
## Call:
## randomForest(formula = DETERIORO ~ ., data = second.datos_train, mtry = 3, trees = 50, max.depth = 10, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 3.95%
## Confusion matrix:
## 1 2 class.error
## 1 41 1 0.02380952
## 2 2 32 0.05882353
#-----ERORES DE PREDICCION TEST--------#
second.DETERIORO2p = predict(second.FIN_SM, newdata = second.datos_test)
second.mat_confusion=table(second.datos_test$DETERIORO,second.DETERIORO2p)
second.err <- (second.mat_confusion[1,2]+second.mat_confusion[2,1])/sum(second.mat_confusion)
print(second.err)
## [1] 0
your_data <- randomForest::importance(second.FIN_SM, type = 1)[order(randomForest::importance(second.FIN_SM, type = 1), decreasing = TRUE),]
your_data <- as.data.frame(your_data)
datatable(your_data, options = list(pageLength = 10), colnames = 'MeanDecreaseGini')
Acierto FIRST RF:
## Acierto RF: 94%
Acierto SECOND RF:
## Acierto RF: 100%