Cargando Datos Simulados
datos = read_csv('https://raw.githubusercontent.com/JoaquinAmatRodrigo/Estadistica-machine-learning-python/master/data/blobs.csv')
Rows: 1500 Columns: 3
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (3): y, x_1, x_2
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
datos <- datos %>% mutate(y = as.factor(y))
ggplot(data = datos, aes(x = x_1, y = x_2, fill = y)) +
geom_point(shape = 21, size = 2) +
theme_fivethirtyeight() +
theme(
legend.position = "none",
text = element_blank(),
axis.ticks = element_blank()
)

Creando particiones train , test y validation:
- Asignando un ratio de 80,20 con
h2o.splitFrame:
- Random Seed de 123
df_h2o <- as.h2o(datos)
particiones <- h2o.splitFrame(data = df_h2o, ratios = c(0.6, 0.2), seed = 123)
df_train <- h2o.assign(data = particiones[[1]], key = "df_train")
df_val <- h2o.assign(data = particiones[[2]], key = "datos_validacion")
df_test <- h2o.assign(data = particiones[[3]], key = "df_test")
Observamos el shape o dimension de los datos particionados
library(glue)
glue("Dimension de datos de training {dim(df_train)}")
Dimension de datos de training 918
Dimension de datos de training 3
glue("Dimension de datos de validation {dim(df_val)}")
Dimension de datos de validation 281
Dimension de datos de validation 3
glue("Dimension de datos de testing {dim(df_test)}")
Dimension de datos de testing 301
Dimension de datos de testing 3
Existen 918,281 y 301 datos para training, validacion y testing
respectivamente.
Ahora se procedera a crear 4 modelos mediante
h20.deeplearning
Prediccion Modelos Originales
# Modelos
# ==============================================================================
modelo_1 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Rectifier",
adaptive_rate = FALSE,
hidden = 1,
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "modelo_1"
)
modelo_2 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Rectifier",
adaptive_rate = FALSE,
hidden = 10,
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "modelo_2"
)
modelo_3 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Rectifier",
adaptive_rate = FALSE,
hidden = c(10, 10),
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "modelo_3"
)
modelo_4 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Rectifier",
adaptive_rate = FALSE,
hidden = c(50, 50, 50),
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "modelo_4"
)
Definicion de Arquitectura Distinta - Mayor Cantidad de Neurons
Definicion de predicciones para los 4 modelos originales:
# Predicciones de cada modelo
# ==============================================================================
grid_predicciones <- expand.grid(
x_1 = seq(from = min(datos$x_1), to = max(datos$x_1), length = 75),
x_2 = seq(from = min(datos$x_2), to = max(datos$x_2), length = 75)
)
grid_predicciones_h2o <- as.h2o(grid_predicciones)
predicciones_1 <- h2o.predict(
object = modelo_1,
newdata = grid_predicciones_h2o
)
predicciones_2 <- h2o.predict(
object = modelo_2,
newdata = grid_predicciones_h2o
)
predicciones_3 <- h2o.predict(
object = modelo_3,
newdata = grid_predicciones_h2o
)
predicciones_4 <- h2o.predict(
object = modelo_4,
newdata = grid_predicciones_h2o
)
grid_predicciones$modelo_1 <- as.vector(predicciones_1$predict)
grid_predicciones$modelo_2 <- as.vector(predicciones_2$predict)
grid_predicciones$modelo_3 <- as.vector(predicciones_3$predict)
grid_predicciones$modelo_4 <- as.vector(predicciones_4$predict)
# Gráfico de predicciones
# ==============================================================================
p1 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_1)) +
geom_point(size = 0.5) +
theme_fivethirtyeight() +
labs(title = "Arquitectura: (5)") +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
p2 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color =modelo_2)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura: (10)") +
theme_fivethirtyeight() +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
p3 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_3)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura: (20, 20)") +
theme_fivethirtyeight() +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
p4 <- ggplot(data = grid_predicciones, aes(x = x_1, y = x_2, color = modelo_4)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura: (50, 50, 50)") +
theme_fivethirtyeight() +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
ggarrange(p1, p2, p3, p4, nrow = 2, ncol = 2)

Creacion de Nuevos Modelos con mayor cantidad de hidden Layers
(Capas Ocultas)
A continuacion , se crea diferentes modelos aplicando una
arquitectura con diferentes parametros , en este caso:
- Modelo 1 : Modelo Con 6 capas ocultas de 30
neuronas cada una, activacion ReLU sin dropout
- Modelo 2 : Modelo con 1 sola capa oculta de 500
neuronas, activacion ReLU con dropout
- Modelo 3 : Modelo con 10 capas ocultas de 10
neuronas cada una , activacion tanh sin dropout
- Modelo 4 : Modelo con 3 capas ocultas de 10
neuronas cada una , activacion tanh sin dropout
Tanh:
\[ tanh = \frac{e^x – e^-x}{e^x +
e^-x}\] Se consideran valores negativos, mientras que en el rango
mÃnimo sigmoideo es 0 pero en Tanh, el rango mÃnimo es -1. Esta es la
razón por la que la función de activación de Tanh también se conoce como
función de activación centrada en cero.
Desventaja:
También enfrenta el mismo problema del problema del gradiente de fuga
como una función sigmoidea.
# Definicion Modelos Custom
# ==============================================================================
# Modelo 1
model_1 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Rectifier",
adaptive_rate = FALSE,
hidden = c(30,30,30,30,30,30),
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "model_1"
)
# Modelo 2
model_2 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "RectifierWithDropout",
adaptive_rate = FALSE,
hidden = 500,
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "model_2"
)
# Modelo 3
model_3 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Tanh",
adaptive_rate = FALSE,
hidden = c(10,10,10,10,10,10,10,10,10,10),
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "model_3"
)
# Modelo 4
model_4 <- h2o.deeplearning(
x = c("x_1", "x_2"),
y = "y",
distribution = "multinomial",
training_frame = df_train,
standardize = TRUE,
activation = "Tanh",
adaptive_rate = FALSE,
hidden = c(10,10,10),
stopping_rounds = 0,
epochs = 1000,
seed = 123,
model_id = "model_4"
)
Generando predicciones con los nuevos modelos:
grid_new <- expand.grid(
x_1 = seq(from = min(datos$x_1), to = max(datos$x_1), length = 75),
x_2 = seq(from = min(datos$x_2), to = max(datos$x_2), length = 75)
)
grid_new_predict <- as.h2o(grid_new)
predict_1 <- h2o.predict(
object = model_1,
newdata = grid_new_predict
)
predict_2 <- h2o.predict(
object = model_2,
newdata = grid_new_predict
)
predict_3 <- h2o.predict(
object = model_3,
newdata = grid_new_predict
)
predict_4 <- h2o.predict(
object = model_4,
newdata = grid_new_predict
)
grid_new$model_1 <- as.vector(predict_1$predict)
grid_new$model_2 <- as.vector(predict_2$predict)
grid_new$model_3 <- as.vector(predict_3$predict)
grid_new$model_4 <- as.vector(predict_4$predict)
Predicciones con nuevos Modelos:
# Gráfico de predicciones
# ==============================================================================
p1 <- ggplot(data = grid_new, aes(x = x_1, y = x_2, color = model_1)) +
geom_point(size = 0.5) +
theme_economist() +
labs(title = "Arquitectura: (30,30,30,30,30,30) ReLU") +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
p2 <- ggplot(data = grid_new, aes(x = x_1, y = x_2, color = model_2)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura: Neurona Unica ReLU Dropout ") +
theme_economist() +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
p3 <- ggplot(data = grid_new, aes(x = x_1, y = x_2, color = model_3)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura c(10,10,10,10,10,10,10,10,10,10) TanH ") +
theme_economist() +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
p4 <- ggplot(data = grid_new, aes(x = x_1, y = x_2, color = model_4)) +
geom_point(size = 0.5) +
labs(title = "Arquitectura: c(10, 10, 10) tanh") +
theme_economist() +
theme(legend.position = "none",
plot.title = element_text(size=11),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
ggarrange(p1, p2, p3, p4, nrow = 2, ncol = 2)

Comparacion de Accuracy entre modelos en el dataset de test:
predict_test <- h2o.predict(object = modelo_1,newdata = df_test)
accuracy_1 <- mean(predict_test["predict"] == df_test["y"])
predict_test <- h2o.predict(object = modelo_2,newdata = df_test)
accuracy_2 <- mean(predict_test["predict"] == df_test["y"])
predict_test <- h2o.predict(object = modelo_3,newdata = df_test)
accuracy_3 <- mean(predict_test["predict"] == df_test["y"])
predict_test <- h2o.predict(object = modelo_4,newdata = df_test)
accuracy_4 <- mean(predict_test["predict"] == df_test["y"])
glue("Accuracy del Modelo 1 : {accuracy_1} \n
Accuracy del Modelo 2 : {accuracy_2} \n
Accuracy del Modelo 3 : {accuracy_3} \n
Accuracy del Modelo 4 : {accuracy_4} \n
" )
Accuracy del Modelo 1 : 0.740863787375415
Accuracy del Modelo 2 : 0.857142857142857
Accuracy del Modelo 3 : 0.86046511627907
Accuracy del Modelo 4 : 0.857142857142857
predict_test <- h2o.predict(object = model_1,newdata = df_test)
accuracy_1_new <- mean(predict_test["predict"] == df_test["y"])
predict_test <- h2o.predict(object = model_2,newdata = df_test)
accuracy_2_new <- mean(predict_test["predict"] == df_test["y"])
predict_test <- h2o.predict(object = model_3,newdata = df_test)
accuracy_3_new <- mean(predict_test["predict"] == df_test["y"])
predict_test <- h2o.predict(object = model_4,newdata = df_test)
accuracy_4_new <- mean(predict_test["predict"] == df_test["y"])
Imprimiendo el accuracy de cada modelo:
glue("Accuracy del Modelo 1 Nuevo: {accuracy_1_new} \n
Accuracy del Modelo 2 Nuevo: {accuracy_2_new} \n
Accuracy del Modelo 3 Nuevo: {accuracy_3_new} \n
Accuracy del Modelo 4 Nuevo : {accuracy_4_new} \n
" )
Accuracy del Modelo 1 Nuevo: 0.837209302325581
Accuracy del Modelo 2 Nuevo: 0.850498338870432
Accuracy del Modelo 3 Nuevo: 0.830564784053156
Accuracy del Modelo 4 Nuevo : 0.837209302325581
Se grafica todas las accuracies de los modelos planteados:
accuracy <- c(accuracy_1,accuracy_2,accuracy_3,accuracy_4, accuracy_1_new, accuracy_2_new,accuracy_3_new,accuracy_4_new)
model <- c("Modelo 1", "Modelo 2","Modelo 3","Modelo 4", "Modelo 1 nuevo", "Modelo 2 Nuevo","Modelo 3 Nuevo","Modelo 4 Nuevo")
data <- data.frame(accuracy, model)
ggplot(data,aes(x= reorder(model,-accuracy),y= accuracy, fill=model ),title("Accuracy de Modelos Planteados "))+geom_bar(stat ="identity") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Se observa que el mejor creado corresponde al Modelo 3, con un 86% de
certeza en los datos de testing, recordando que el modelo 3 posee una
arquitectura c(10, 10) de 2 hidden layers con 10 neuronas
cada una, entre los modelos nuevos creados , el modelo 2 nuevo obtiene
el mejor accuracy con el 85%.
