Librerías

library(stats)
library(psych)
library(MASS)
library(ISLR)
library(fRegression)
library(vcd)
## Loading required package: grid
## 
## Attaching package: 'vcd'
## The following object is masked from 'package:ISLR':
## 
##     Hitters
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(openxlsx)
library(mlbench)
library(magrittr)
library(neuralnet)
## 
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
## 
##     compute
library(keras)
## The keras package is deprecated. Please use the keras3 package instead.
## Alternatively, to continue using legacy keras, call `py_require_legacy_keras()`.
library(keras3)
## Registered S3 methods overwritten by 'keras3':
##   method                               from 
##   as.data.frame.keras_training_history keras
##   plot.keras_training_history          keras
##   print.keras_training_history         keras
##   r_to_py.R6ClassGenerator             keras
## 
## Attaching package: 'keras3'
## The following objects are masked from 'package:keras':
## 
##     %<-active%, %py_class%, activation_elu, activation_exponential,
##     activation_gelu, activation_hard_sigmoid, activation_linear,
##     activation_relu, activation_selu, activation_sigmoid,
##     activation_softmax, activation_softplus, activation_softsign,
##     activation_tanh, adapt, application_densenet121,
##     application_densenet169, application_densenet201,
##     application_efficientnet_b0, application_efficientnet_b1,
##     application_efficientnet_b2, application_efficientnet_b3,
##     application_efficientnet_b4, application_efficientnet_b5,
##     application_efficientnet_b6, application_efficientnet_b7,
##     application_inception_resnet_v2, application_inception_v3,
##     application_mobilenet, application_mobilenet_v2,
##     application_mobilenet_v3_large, application_mobilenet_v3_small,
##     application_nasnetlarge, application_nasnetmobile,
##     application_resnet101, application_resnet101_v2,
##     application_resnet152, application_resnet152_v2,
##     application_resnet50, application_resnet50_v2, application_vgg16,
##     application_vgg19, application_xception, bidirectional,
##     callback_backup_and_restore, callback_csv_logger,
##     callback_early_stopping, callback_lambda,
##     callback_learning_rate_scheduler, callback_model_checkpoint,
##     callback_reduce_lr_on_plateau, callback_remote_monitor,
##     callback_tensorboard, clone_model, constraint_maxnorm,
##     constraint_minmaxnorm, constraint_nonneg, constraint_unitnorm,
##     count_params, custom_metric, dataset_boston_housing,
##     dataset_cifar10, dataset_cifar100, dataset_fashion_mnist,
##     dataset_imdb, dataset_imdb_word_index, dataset_mnist,
##     dataset_reuters, dataset_reuters_word_index, freeze_weights,
##     from_config, get_config, get_file, get_layer, get_vocabulary,
##     get_weights, image_array_save, image_dataset_from_directory,
##     image_load, image_to_array, imagenet_decode_predictions,
##     imagenet_preprocess_input, initializer_constant,
##     initializer_glorot_normal, initializer_glorot_uniform,
##     initializer_he_normal, initializer_he_uniform,
##     initializer_identity, initializer_lecun_normal,
##     initializer_lecun_uniform, initializer_ones,
##     initializer_orthogonal, initializer_random_normal,
##     initializer_random_uniform, initializer_truncated_normal,
##     initializer_variance_scaling, initializer_zeros, install_keras,
##     keras, keras_model, keras_model_sequential, Layer,
##     layer_activation, layer_activation_elu,
##     layer_activation_leaky_relu, layer_activation_parametric_relu,
##     layer_activation_relu, layer_activation_softmax,
##     layer_activity_regularization, layer_add, layer_additive_attention,
##     layer_alpha_dropout, layer_attention, layer_average,
##     layer_average_pooling_1d, layer_average_pooling_2d,
##     layer_average_pooling_3d, layer_batch_normalization,
##     layer_category_encoding, layer_center_crop, layer_concatenate,
##     layer_conv_1d, layer_conv_1d_transpose, layer_conv_2d,
##     layer_conv_2d_transpose, layer_conv_3d, layer_conv_3d_transpose,
##     layer_conv_lstm_1d, layer_conv_lstm_2d, layer_conv_lstm_3d,
##     layer_cropping_1d, layer_cropping_2d, layer_cropping_3d,
##     layer_dense, layer_depthwise_conv_1d, layer_depthwise_conv_2d,
##     layer_discretization, layer_dot, layer_dropout, layer_embedding,
##     layer_flatten, layer_gaussian_dropout, layer_gaussian_noise,
##     layer_global_average_pooling_1d, layer_global_average_pooling_2d,
##     layer_global_average_pooling_3d, layer_global_max_pooling_1d,
##     layer_global_max_pooling_2d, layer_global_max_pooling_3d,
##     layer_gru, layer_hashing, layer_input, layer_integer_lookup,
##     layer_lambda, layer_layer_normalization, layer_lstm, layer_masking,
##     layer_max_pooling_1d, layer_max_pooling_2d, layer_max_pooling_3d,
##     layer_maximum, layer_minimum, layer_multi_head_attention,
##     layer_multiply, layer_normalization, layer_permute,
##     layer_random_brightness, layer_random_contrast, layer_random_crop,
##     layer_random_flip, layer_random_rotation, layer_random_translation,
##     layer_random_zoom, layer_repeat_vector, layer_rescaling,
##     layer_reshape, layer_resizing, layer_rnn, layer_separable_conv_1d,
##     layer_separable_conv_2d, layer_simple_rnn,
##     layer_spatial_dropout_1d, layer_spatial_dropout_2d,
##     layer_spatial_dropout_3d, layer_string_lookup, layer_subtract,
##     layer_text_vectorization, layer_unit_normalization,
##     layer_upsampling_1d, layer_upsampling_2d, layer_upsampling_3d,
##     layer_zero_padding_1d, layer_zero_padding_2d,
##     layer_zero_padding_3d, learning_rate_schedule_cosine_decay,
##     learning_rate_schedule_cosine_decay_restarts,
##     learning_rate_schedule_exponential_decay,
##     learning_rate_schedule_inverse_time_decay,
##     learning_rate_schedule_piecewise_constant_decay,
##     learning_rate_schedule_polynomial_decay, loss_binary_crossentropy,
##     loss_categorical_crossentropy, loss_categorical_hinge,
##     loss_cosine_similarity, loss_hinge, loss_huber, loss_kl_divergence,
##     loss_mean_absolute_error, loss_mean_absolute_percentage_error,
##     loss_mean_squared_error, loss_mean_squared_logarithmic_error,
##     loss_poisson, loss_sparse_categorical_crossentropy,
##     loss_squared_hinge, mark_active, metric_auc,
##     metric_binary_accuracy, metric_binary_crossentropy,
##     metric_categorical_accuracy, metric_categorical_crossentropy,
##     metric_categorical_hinge, metric_cosine_similarity,
##     metric_false_negatives, metric_false_positives, metric_hinge,
##     metric_mean, metric_mean_absolute_error,
##     metric_mean_absolute_percentage_error, metric_mean_iou,
##     metric_mean_squared_error, metric_mean_squared_logarithmic_error,
##     metric_mean_wrapper, metric_poisson, metric_precision,
##     metric_precision_at_recall, metric_recall,
##     metric_recall_at_precision, metric_root_mean_squared_error,
##     metric_sensitivity_at_specificity,
##     metric_sparse_categorical_accuracy,
##     metric_sparse_categorical_crossentropy,
##     metric_sparse_top_k_categorical_accuracy,
##     metric_specificity_at_sensitivity, metric_squared_hinge,
##     metric_sum, metric_top_k_categorical_accuracy,
##     metric_true_negatives, metric_true_positives, new_callback_class,
##     new_layer_class, new_learning_rate_schedule_class, new_loss_class,
##     new_metric_class, new_model_class, normalize, optimizer_adadelta,
##     optimizer_adagrad, optimizer_adam, optimizer_adamax,
##     optimizer_ftrl, optimizer_nadam, optimizer_rmsprop, optimizer_sgd,
##     pad_sequences, pop_layer, predict_on_batch, regularizer_l1,
##     regularizer_l1_l2, regularizer_l2, regularizer_orthogonal,
##     set_vocabulary, set_weights, shape, test_on_batch,
##     text_dataset_from_directory, time_distributed,
##     timeseries_dataset_from_array, to_categorical, train_on_batch,
##     unfreeze_weights, use_backend, with_custom_object_scope, zip_lists
library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Loading required package: lattice

Leer base de datos

En este análisis se utilizó el dataset Seatbelts, el cual contiene información sobre muertes en carreteras en Gran Bretaña.

Se verificó la estructura de los datos mediante las funciones str() y head(), con el objetivo de entender las variables disponibles y su tipo de dato.

data("Seatbelts")
data <- as.data.frame(Seatbelts)
str(data)
## 'data.frame':    192 obs. of  8 variables:
##  $ DriversKilled: num  107 97 102 87 119 106 110 106 107 134 ...
##  $ drivers      : num  1687 1508 1507 1385 1632 ...
##  $ front        : num  867 825 806 814 991 ...
##  $ rear         : num  269 265 319 407 454 427 522 536 405 437 ...
##  $ kms          : num  9059 7685 9963 10955 11823 ...
##  $ PetrolPrice  : num  0.103 0.102 0.102 0.101 0.101 ...
##  $ VanKilled    : num  12 6 12 8 10 13 11 6 10 16 ...
##  $ law          : num  0 0 0 0 0 0 0 0 0 0 ...
head(data)
##   DriversKilled drivers front rear   kms PetrolPrice VanKilled law
## 1           107    1687   867  269  9059   0.1029718        12   0
## 2            97    1508   825  265  7685   0.1023630         6   0
## 3           102    1507   806  319  9963   0.1020625        12   0
## 4            87    1385   814  407 10955   0.1008733         8   0
## 5           119    1632   991  454 11823   0.1010197        10   0
## 6           106    1511   945  427 12391   0.1005812        13   0
?Seatbelts

Como parte del preprocesamiento, se revisó que todas las variables fueran numéricas, ya que las redes neuronales requieren este tipo de entradas para poder operar correctamente.

En este caso, todas las variables del dataset ya se encontraban en formato numérico, por lo que no fue necesario realizar transformaciones adicionales.

# data %<>% mutate_if(is.factor, as.numeric) #Transforma variables 'factor' en variables numéricas
# data %<>% mutate_if(is.character, as.numeric) #Transforma variables 'character' en variables numéricas

# # Solo utilizar en caso de que los caracteres correspondan a números. Si los caracteres corresponden a categorías, entonces es necesario crear variables dummies.
colSums(is.na(data))
## DriversKilled       drivers         front          rear           kms 
##             0             0             0             0             0 
##   PetrolPrice     VanKilled           law 
##             0             0             0

También se verificó la existencia de valores faltantes (NA), ya que estos pueden afectar el entrenamiento del modelo al impedir el cálculo correcto de errores.

No se detectaron valores faltantes en el dataset, por lo que no fue necesario realizar imputaciones.

Ajustar y Visualizar Redes Neuronales

Se realizó una partición de los datos en conjuntos de entrenamiento (75%) y prueba (25%), con el objetivo de evaluar el desempeño de los modelos sobre datos no vistos previamente.

set.seed(13)
train = createDataPartition(y = data$DriversKilled, p=0.75, list=FALSE, times = 1)
data_train = data[train,]
data_test = data[-train,]
RN <- neuralnet(DriversKilled ~ .,
               data = data_train,
               hidden = c(12,7),
               linear.output = T,
               lifesign = 'full',
               threshold = 0.05,
               rep=1)
?neuralnet

Se construyó una primera red neuronal con dos capas ocultas, con 12 y 7 neuronas respectivamente.

El objetivo del modelo es predecir la variable DriversKilled a partir del resto de las variables del dataset.

Para mejorar el desempeño del modelo, se realizó una estandarización de las variables, transformándolas a una escala con media cero y desviación estándar uno.

Este proceso es importante ya que las redes neuronales son sensibles a la escala de los datos.

#train/test split en matrices y separando variable a predecir
training <- as.matrix(data_train[,-1])
trainingtarget <- as.matrix(data_train[,1])
test <- as.matrix(data_test[,-1])
testtarget <- as.matrix(data_test[,1])

# Estandarización de variables
m <- colMeans(training) #Obtener medias por columna
s <- apply(training, 2, sd) #Calcular StandDev por columna (por ello el apply lleva el 2, si pusieran 1 sería por renglón)
training <- scale(training, center = m, scale = s)
test <- scale(test, center = m, scale = s)

La estandarización se realizó utilizando únicamente la media y desviación estándar del conjunto de entrenamiento, con el fin de evitar fuga de información hacia el conjunto de prueba.

Predicciones

Una vez estandarizados los datos, se volvió a ajustar la red neuronal con el objetivo de mejorar su desempeño.

data_train_S <- as.data.frame(cbind(training,(trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)

RNS <- neuralnet(DriversKilled ~ .,
               data = data_train_S,
               hidden = c(12,7),
               linear.output = T, 
               #linear.output se debe poner como T en modelos de regresion y como F en modelos de clasificación
               lifesign = 'full',
               rep=1,
               # threshold=0.02,
               stepmax=200000)

Visualizar la Red Neuronal ajustada

plot(RNS,col.hidden = 'darkgreen',     
col.hidden.synapse = 'darkgreen',
     show.weights = T,
     information = F,
     fill = 'lightblue')

Se realizaron predicciones sobre el conjunto de prueba utilizando el modelo entrenado.

data_test_S <- as.data.frame(test)
colnames(data_test_S) <- colnames(data_test)[-1]

RNSPredictions <- predict(RNS,data_test_S)
cor(RNSPredictions,(testtarget-mean(trainingtarget))/sd(trainingtarget))
##             [,1]
## [1,] -0.05188054

Las predicciones fueron desestandarizadas para poder interpretarlas en la escala original de la variable objetivo y compararlas correctamente con los valores reales.

RNSPred <- RNSPredictions * sd(trainingtarget) + mean(trainingtarget)
plot(RNSPred,testtarget)
abline(a=0, b=1)

RSSnn <- (RNSPred - testtarget)^2
sum(RSSnn)/nrow(testtarget)
## [1] 1083.153
R2_NN1 <- 1 - sum(RSSnn)/sum((testtarget - mean(trainingtarget))^2)
R2_NN1
## [1] -0.8433498
LRM <- lm(DriversKilled ~ ., data = data_train)
LRMPred <- predict(LRM, data_test)
cor(LRMPred, data_test$DriversKilled)
## [1] 0.848806
plot(LRMPred, data_test$DriversKilled)
abline(a = 0, b = 1)

LRMRSS <- (LRMPred - data_test$DriversKilled)^2
sum(LRMRSS)/nrow(data_test)
## [1] 165.3711
1 - sum(LRMRSS)/sum((data_test$DriversKilled - mean(data_train$DriversKilled))^2)
## [1] 0.7185655

Se realizó un segundo intento utilizando una arquitectura más simple, con 5 y 3 neuronas, con el fin de evaluar si una menor complejidad mejora el desempeño del modelo.

data_train_S <- as.data.frame(cbind(training,(trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)

RNS2 <- neuralnet(DriversKilled ~ .,
               data = data_train_S,
               hidden = c(5,3),
               linear.output = T, 
               lifesign = 'full',
               rep=1,
               # threshold=0.02,
               stepmax=200000)

Visualizar la Red Neuronal ajustada

print(plot(RNS2,col.hidden = 'darkgreen',     
col.hidden.synapse = 'darkgreen',
     show.weights = T,
     information = F,
     fill = 'lightblue'))
## NULL

Realizar predicciones

data_test_S <- as.data.frame(test)
colnames(data_test_S) <- colnames(data_test)[-1]

RNS2Predictions <- predict(RNS2,data_test_S)
cor(RNS2Predictions,(testtarget-mean(trainingtarget))/sd(trainingtarget))
##           [,1]
## [1,] 0.2029685

Para calcular métricas de regresión es importante ‘desestandarizar’ las predicciones y eso es lo que se comparará contra los valores de testtarget

RNS2Pred <- RNS2Predictions*sd(trainingtarget) + mean(trainingtarget)
plot(RNS2Pred,testtarget)
abline(a=0, b=1)

RSS2nn <- (RNS2Pred - testtarget)^2
sum(RSS2nn)/nrow(testtarget)
## [1] 696.5602
R2_NN2 <- 1 - sum(RSS2nn)/sum((testtarget - mean(trainingtarget))^2)
R2_NN2
## [1] -0.1854314

Comparar modelos gráficamente

Opción A) Dos gráficas diferentes

# dev.off()
par(mfrow=c(1,2))
plot(data_test$DriversKilled,RNS2Pred,col='red',main='Real vs predicted NN',pch=19,cex=1)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
plot(data_test$DriversKilled,LRMPred,col='blue',main='Real vs predicted LM',pch=15, cex=1)
abline(0,1,lwd=2)
legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=.95)

Opción B) En una misma gráfica

dev.off()
## pdf 
##   3
plot(data_test$DriversKilled,RNS2Pred,col='red',main='Real vs predicted NN',pch=19,cex=1)
points(data_test$DriversKilled,LRMPred,col='blue',pch=15,cex=1)
abline(0,1,lwd=2)
legend('bottomright',legend=c('NN','LM'),pch=c(19,15),col=c('red','blue'))

Modelo de Regresión Lineal Múltiple

Como punto de comparación, se ajustó un modelo de regresión lineal múltiple utilizando las mismas variables explicativas.

LRM <- lm(DriversKilled ~ ., data = data_train)
summary(LRM)
## 
## Call:
## lm(formula = DriversKilled ~ ., data = data_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.578  -7.211  -0.655   6.310  35.889 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.622e+01  1.672e+01  -1.569    0.119    
## drivers      8.760e-02  5.850e-03  14.974   <2e-16 ***
## front       -7.972e-03  1.895e-02  -0.421    0.675    
## rear         5.546e-03  2.663e-02   0.208    0.835    
## kms          6.525e-04  5.511e-04   1.184    0.238    
## PetrolPrice -1.404e+01  9.296e+01  -0.151    0.880    
## VanKilled   -1.829e-01  3.197e-01  -0.572    0.568    
## law          4.072e+00  4.594e+00   0.886    0.377    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.18 on 137 degrees of freedom
## Multiple R-squared:  0.8206, Adjusted R-squared:  0.8115 
## F-statistic: 89.55 on 7 and 137 DF,  p-value: < 2.2e-16
LRMPred <- predict(LRM, data_test)
cor(LRMPred, data_test$DriversKilled)
## [1] 0.848806
plot(LRMPred, data_test$DriversKilled)
abline(a = 0, b = 1)

LRMRSS <- (LRMPred - data_test$DriversKilled)^2
sum(LRMRSS)/nrow(data_test)
## [1] 165.3711
R2_LM <- 1 - sum(LRMRSS)/sum((data_test$DriversKilled - mean(data_train$DriversKilled))^2)
R2_LM
## [1] 0.7185655

Comparación de Modelos

Model_Comparison <- data.frame(
  Modelo = c("Red Neuronal (12,7)", "Red Neuronal (5,3)", "Regresión Lineal"),
  R2 = c(R2_NN1, R2_NN2, R2_LM)
)

Model_Comparison
##                Modelo         R2
## 1 Red Neuronal (12,7) -0.8433498
## 2  Red Neuronal (5,3) -0.1854314
## 3    Regresión Lineal  0.7185655

Al comparar los modelos, se observa que las redes neuronales presentan bajo desempeño, mientras que la regresión lineal resulta ser el modelo más adecuado para este dataset.

Esto sugiere que las relaciones entre variables son principalmente lineales y que el tamaño del dataset limita el uso de modelos más complejos.

En conclusión, el desempeño de los modelos depende tanto del tamaño del dataset como de la complejidad del modelo.

Las redes neuronales requieren grandes volúmenes de datos para ser efectivas, mientras que modelos más simples pueden ofrecer mejores resultados en contextos con datos limitados.

Este análisis demuestra que una mayor complejidad no garantiza un mejor desempeño.