library(stats)
library(psych)
library(MASS)
library(ISLR)
library(fRegression)
library(vcd)
## Loading required package: grid
##
## Attaching package: 'vcd'
## The following object is masked from 'package:ISLR':
##
## Hitters
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(openxlsx)
library(mlbench)
library(magrittr)
library(neuralnet)
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
library(keras)
## The keras package is deprecated. Please use the keras3 package instead.
## Alternatively, to continue using legacy keras, call `py_require_legacy_keras()`.
library(keras3)
## Registered S3 methods overwritten by 'keras3':
## method from
## as.data.frame.keras_training_history keras
## plot.keras_training_history keras
## print.keras_training_history keras
## r_to_py.R6ClassGenerator keras
##
## Attaching package: 'keras3'
## The following objects are masked from 'package:keras':
##
## %<-active%, %py_class%, activation_elu, activation_exponential,
## activation_gelu, activation_hard_sigmoid, activation_linear,
## activation_relu, activation_selu, activation_sigmoid,
## activation_softmax, activation_softplus, activation_softsign,
## activation_tanh, adapt, application_densenet121,
## application_densenet169, application_densenet201,
## application_efficientnet_b0, application_efficientnet_b1,
## application_efficientnet_b2, application_efficientnet_b3,
## application_efficientnet_b4, application_efficientnet_b5,
## application_efficientnet_b6, application_efficientnet_b7,
## application_inception_resnet_v2, application_inception_v3,
## application_mobilenet, application_mobilenet_v2,
## application_mobilenet_v3_large, application_mobilenet_v3_small,
## application_nasnetlarge, application_nasnetmobile,
## application_resnet101, application_resnet101_v2,
## application_resnet152, application_resnet152_v2,
## application_resnet50, application_resnet50_v2, application_vgg16,
## application_vgg19, application_xception, bidirectional,
## callback_backup_and_restore, callback_csv_logger,
## callback_early_stopping, callback_lambda,
## callback_learning_rate_scheduler, callback_model_checkpoint,
## callback_reduce_lr_on_plateau, callback_remote_monitor,
## callback_tensorboard, clone_model, constraint_maxnorm,
## constraint_minmaxnorm, constraint_nonneg, constraint_unitnorm,
## count_params, custom_metric, dataset_boston_housing,
## dataset_cifar10, dataset_cifar100, dataset_fashion_mnist,
## dataset_imdb, dataset_imdb_word_index, dataset_mnist,
## dataset_reuters, dataset_reuters_word_index, freeze_weights,
## from_config, get_config, get_file, get_layer, get_vocabulary,
## get_weights, image_array_save, image_dataset_from_directory,
## image_load, image_to_array, imagenet_decode_predictions,
## imagenet_preprocess_input, initializer_constant,
## initializer_glorot_normal, initializer_glorot_uniform,
## initializer_he_normal, initializer_he_uniform,
## initializer_identity, initializer_lecun_normal,
## initializer_lecun_uniform, initializer_ones,
## initializer_orthogonal, initializer_random_normal,
## initializer_random_uniform, initializer_truncated_normal,
## initializer_variance_scaling, initializer_zeros, install_keras,
## keras, keras_model, keras_model_sequential, Layer,
## layer_activation, layer_activation_elu,
## layer_activation_leaky_relu, layer_activation_parametric_relu,
## layer_activation_relu, layer_activation_softmax,
## layer_activity_regularization, layer_add, layer_additive_attention,
## layer_alpha_dropout, layer_attention, layer_average,
## layer_average_pooling_1d, layer_average_pooling_2d,
## layer_average_pooling_3d, layer_batch_normalization,
## layer_category_encoding, layer_center_crop, layer_concatenate,
## layer_conv_1d, layer_conv_1d_transpose, layer_conv_2d,
## layer_conv_2d_transpose, layer_conv_3d, layer_conv_3d_transpose,
## layer_conv_lstm_1d, layer_conv_lstm_2d, layer_conv_lstm_3d,
## layer_cropping_1d, layer_cropping_2d, layer_cropping_3d,
## layer_dense, layer_depthwise_conv_1d, layer_depthwise_conv_2d,
## layer_discretization, layer_dot, layer_dropout, layer_embedding,
## layer_flatten, layer_gaussian_dropout, layer_gaussian_noise,
## layer_global_average_pooling_1d, layer_global_average_pooling_2d,
## layer_global_average_pooling_3d, layer_global_max_pooling_1d,
## layer_global_max_pooling_2d, layer_global_max_pooling_3d,
## layer_gru, layer_hashing, layer_input, layer_integer_lookup,
## layer_lambda, layer_layer_normalization, layer_lstm, layer_masking,
## layer_max_pooling_1d, layer_max_pooling_2d, layer_max_pooling_3d,
## layer_maximum, layer_minimum, layer_multi_head_attention,
## layer_multiply, layer_normalization, layer_permute,
## layer_random_brightness, layer_random_contrast, layer_random_crop,
## layer_random_flip, layer_random_rotation, layer_random_translation,
## layer_random_zoom, layer_repeat_vector, layer_rescaling,
## layer_reshape, layer_resizing, layer_rnn, layer_separable_conv_1d,
## layer_separable_conv_2d, layer_simple_rnn,
## layer_spatial_dropout_1d, layer_spatial_dropout_2d,
## layer_spatial_dropout_3d, layer_string_lookup, layer_subtract,
## layer_text_vectorization, layer_unit_normalization,
## layer_upsampling_1d, layer_upsampling_2d, layer_upsampling_3d,
## layer_zero_padding_1d, layer_zero_padding_2d,
## layer_zero_padding_3d, learning_rate_schedule_cosine_decay,
## learning_rate_schedule_cosine_decay_restarts,
## learning_rate_schedule_exponential_decay,
## learning_rate_schedule_inverse_time_decay,
## learning_rate_schedule_piecewise_constant_decay,
## learning_rate_schedule_polynomial_decay, loss_binary_crossentropy,
## loss_categorical_crossentropy, loss_categorical_hinge,
## loss_cosine_similarity, loss_hinge, loss_huber, loss_kl_divergence,
## loss_mean_absolute_error, loss_mean_absolute_percentage_error,
## loss_mean_squared_error, loss_mean_squared_logarithmic_error,
## loss_poisson, loss_sparse_categorical_crossentropy,
## loss_squared_hinge, mark_active, metric_auc,
## metric_binary_accuracy, metric_binary_crossentropy,
## metric_categorical_accuracy, metric_categorical_crossentropy,
## metric_categorical_hinge, metric_cosine_similarity,
## metric_false_negatives, metric_false_positives, metric_hinge,
## metric_mean, metric_mean_absolute_error,
## metric_mean_absolute_percentage_error, metric_mean_iou,
## metric_mean_squared_error, metric_mean_squared_logarithmic_error,
## metric_mean_wrapper, metric_poisson, metric_precision,
## metric_precision_at_recall, metric_recall,
## metric_recall_at_precision, metric_root_mean_squared_error,
## metric_sensitivity_at_specificity,
## metric_sparse_categorical_accuracy,
## metric_sparse_categorical_crossentropy,
## metric_sparse_top_k_categorical_accuracy,
## metric_specificity_at_sensitivity, metric_squared_hinge,
## metric_sum, metric_top_k_categorical_accuracy,
## metric_true_negatives, metric_true_positives, new_callback_class,
## new_layer_class, new_learning_rate_schedule_class, new_loss_class,
## new_metric_class, new_model_class, normalize, optimizer_adadelta,
## optimizer_adagrad, optimizer_adam, optimizer_adamax,
## optimizer_ftrl, optimizer_nadam, optimizer_rmsprop, optimizer_sgd,
## pad_sequences, pop_layer, predict_on_batch, regularizer_l1,
## regularizer_l1_l2, regularizer_l2, regularizer_orthogonal,
## set_vocabulary, set_weights, shape, test_on_batch,
## text_dataset_from_directory, time_distributed,
## timeseries_dataset_from_array, to_categorical, train_on_batch,
## unfreeze_weights, use_backend, with_custom_object_scope, zip_lists
library(caret)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Loading required package: lattice
En este análisis se utilizó el dataset Seatbelts, el cual contiene información sobre muertes en carreteras en Gran Bretaña.
Se verificó la estructura de los datos mediante las funciones
str() y head(), con el objetivo de entender
las variables disponibles y su tipo de dato.
data("Seatbelts")
data <- as.data.frame(Seatbelts)
str(data)
## 'data.frame': 192 obs. of 8 variables:
## $ DriversKilled: num 107 97 102 87 119 106 110 106 107 134 ...
## $ drivers : num 1687 1508 1507 1385 1632 ...
## $ front : num 867 825 806 814 991 ...
## $ rear : num 269 265 319 407 454 427 522 536 405 437 ...
## $ kms : num 9059 7685 9963 10955 11823 ...
## $ PetrolPrice : num 0.103 0.102 0.102 0.101 0.101 ...
## $ VanKilled : num 12 6 12 8 10 13 11 6 10 16 ...
## $ law : num 0 0 0 0 0 0 0 0 0 0 ...
head(data)
## DriversKilled drivers front rear kms PetrolPrice VanKilled law
## 1 107 1687 867 269 9059 0.1029718 12 0
## 2 97 1508 825 265 7685 0.1023630 6 0
## 3 102 1507 806 319 9963 0.1020625 12 0
## 4 87 1385 814 407 10955 0.1008733 8 0
## 5 119 1632 991 454 11823 0.1010197 10 0
## 6 106 1511 945 427 12391 0.1005812 13 0
?Seatbelts
Como parte del preprocesamiento, se revisó que todas las variables fueran numéricas, ya que las redes neuronales requieren este tipo de entradas para poder operar correctamente.
En este caso, todas las variables del dataset ya se encontraban en formato numérico, por lo que no fue necesario realizar transformaciones adicionales.
# data %<>% mutate_if(is.factor, as.numeric) #Transforma variables 'factor' en variables numéricas
# data %<>% mutate_if(is.character, as.numeric) #Transforma variables 'character' en variables numéricas
# # Solo utilizar en caso de que los caracteres correspondan a números. Si los caracteres corresponden a categorías, entonces es necesario crear variables dummies.
colSums(is.na(data))
## DriversKilled drivers front rear kms
## 0 0 0 0 0
## PetrolPrice VanKilled law
## 0 0 0
También se verificó la existencia de valores faltantes (NA), ya que estos pueden afectar el entrenamiento del modelo al impedir el cálculo correcto de errores.
No se detectaron valores faltantes en el dataset, por lo que no fue necesario realizar imputaciones.
Se realizó una partición de los datos en conjuntos de entrenamiento (75%) y prueba (25%), con el objetivo de evaluar el desempeño de los modelos sobre datos no vistos previamente.
set.seed(13)
train = createDataPartition(y = data$DriversKilled, p=0.75, list=FALSE, times = 1)
data_train = data[train,]
data_test = data[-train,]
RN <- neuralnet(DriversKilled ~ .,
data = data_train,
hidden = c(12,7),
linear.output = T,
lifesign = 'full',
threshold = 0.05,
rep=1)
?neuralnet
Se construyó una primera red neuronal con dos capas ocultas, con 12 y 7 neuronas respectivamente.
El objetivo del modelo es predecir la variable DriversKilled a partir del resto de las variables del dataset.
Para mejorar el desempeño del modelo, se realizó una estandarización de las variables, transformándolas a una escala con media cero y desviación estándar uno.
Este proceso es importante ya que las redes neuronales son sensibles a la escala de los datos.
#train/test split en matrices y separando variable a predecir
training <- as.matrix(data_train[,-1])
trainingtarget <- as.matrix(data_train[,1])
test <- as.matrix(data_test[,-1])
testtarget <- as.matrix(data_test[,1])
# Estandarización de variables
m <- colMeans(training) #Obtener medias por columna
s <- apply(training, 2, sd) #Calcular StandDev por columna (por ello el apply lleva el 2, si pusieran 1 sería por renglón)
training <- scale(training, center = m, scale = s)
test <- scale(test, center = m, scale = s)
La estandarización se realizó utilizando únicamente la media y desviación estándar del conjunto de entrenamiento, con el fin de evitar fuga de información hacia el conjunto de prueba.
Una vez estandarizados los datos, se volvió a ajustar la red neuronal con el objetivo de mejorar su desempeño.
data_train_S <- as.data.frame(cbind(training,(trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)
RNS <- neuralnet(DriversKilled ~ .,
data = data_train_S,
hidden = c(12,7),
linear.output = T,
#linear.output se debe poner como T en modelos de regresion y como F en modelos de clasificación
lifesign = 'full',
rep=1,
# threshold=0.02,
stepmax=200000)
plot(RNS,col.hidden = 'darkgreen',
col.hidden.synapse = 'darkgreen',
show.weights = T,
information = F,
fill = 'lightblue')
Se realizaron predicciones sobre el conjunto de prueba utilizando el modelo entrenado.
data_test_S <- as.data.frame(test)
colnames(data_test_S) <- colnames(data_test)[-1]
RNSPredictions <- predict(RNS,data_test_S)
cor(RNSPredictions,(testtarget-mean(trainingtarget))/sd(trainingtarget))
## [,1]
## [1,] -0.05188054
Las predicciones fueron desestandarizadas para poder interpretarlas en la escala original de la variable objetivo y compararlas correctamente con los valores reales.
RNSPred <- RNSPredictions * sd(trainingtarget) + mean(trainingtarget)
plot(RNSPred,testtarget)
abline(a=0, b=1)
RSSnn <- (RNSPred - testtarget)^2
sum(RSSnn)/nrow(testtarget)
## [1] 1083.153
R2_NN1 <- 1 - sum(RSSnn)/sum((testtarget - mean(trainingtarget))^2)
R2_NN1
## [1] -0.8433498
LRM <- lm(DriversKilled ~ ., data = data_train)
LRMPred <- predict(LRM, data_test)
cor(LRMPred, data_test$DriversKilled)
## [1] 0.848806
plot(LRMPred, data_test$DriversKilled)
abline(a = 0, b = 1)
LRMRSS <- (LRMPred - data_test$DriversKilled)^2
sum(LRMRSS)/nrow(data_test)
## [1] 165.3711
1 - sum(LRMRSS)/sum((data_test$DriversKilled - mean(data_train$DriversKilled))^2)
## [1] 0.7185655
Se realizó un segundo intento utilizando una arquitectura más simple, con 5 y 3 neuronas, con el fin de evaluar si una menor complejidad mejora el desempeño del modelo.
data_train_S <- as.data.frame(cbind(training,(trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)
RNS2 <- neuralnet(DriversKilled ~ .,
data = data_train_S,
hidden = c(5,3),
linear.output = T,
lifesign = 'full',
rep=1,
# threshold=0.02,
stepmax=200000)
print(plot(RNS2,col.hidden = 'darkgreen',
col.hidden.synapse = 'darkgreen',
show.weights = T,
information = F,
fill = 'lightblue'))
## NULL
Realizar predicciones
data_test_S <- as.data.frame(test)
colnames(data_test_S) <- colnames(data_test)[-1]
RNS2Predictions <- predict(RNS2,data_test_S)
cor(RNS2Predictions,(testtarget-mean(trainingtarget))/sd(trainingtarget))
## [,1]
## [1,] 0.2029685
Para calcular métricas de regresión es importante ‘desestandarizar’ las predicciones y eso es lo que se comparará contra los valores de testtarget
RNS2Pred <- RNS2Predictions*sd(trainingtarget) + mean(trainingtarget)
plot(RNS2Pred,testtarget)
abline(a=0, b=1)
RSS2nn <- (RNS2Pred - testtarget)^2
sum(RSS2nn)/nrow(testtarget)
## [1] 696.5602
R2_NN2 <- 1 - sum(RSS2nn)/sum((testtarget - mean(trainingtarget))^2)
R2_NN2
## [1] -0.1854314
Comparar modelos gráficamente
Opción A) Dos gráficas diferentes
# dev.off()
par(mfrow=c(1,2))
plot(data_test$DriversKilled,RNS2Pred,col='red',main='Real vs predicted NN',pch=19,cex=1)
abline(0,1,lwd=2)
legend('bottomright',legend='NN',pch=18,col='red', bty='n')
plot(data_test$DriversKilled,LRMPred,col='blue',main='Real vs predicted LM',pch=15, cex=1)
abline(0,1,lwd=2)
legend('bottomright',legend='LM',pch=18,col='blue', bty='n', cex=.95)
Opción B) En una misma gráfica
dev.off()
## pdf
## 3
plot(data_test$DriversKilled,RNS2Pred,col='red',main='Real vs predicted NN',pch=19,cex=1)
points(data_test$DriversKilled,LRMPred,col='blue',pch=15,cex=1)
abline(0,1,lwd=2)
legend('bottomright',legend=c('NN','LM'),pch=c(19,15),col=c('red','blue'))
Como punto de comparación, se ajustó un modelo de regresión lineal múltiple utilizando las mismas variables explicativas.
LRM <- lm(DriversKilled ~ ., data = data_train)
summary(LRM)
##
## Call:
## lm(formula = DriversKilled ~ ., data = data_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.578 -7.211 -0.655 6.310 35.889
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.622e+01 1.672e+01 -1.569 0.119
## drivers 8.760e-02 5.850e-03 14.974 <2e-16 ***
## front -7.972e-03 1.895e-02 -0.421 0.675
## rear 5.546e-03 2.663e-02 0.208 0.835
## kms 6.525e-04 5.511e-04 1.184 0.238
## PetrolPrice -1.404e+01 9.296e+01 -0.151 0.880
## VanKilled -1.829e-01 3.197e-01 -0.572 0.568
## law 4.072e+00 4.594e+00 0.886 0.377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.18 on 137 degrees of freedom
## Multiple R-squared: 0.8206, Adjusted R-squared: 0.8115
## F-statistic: 89.55 on 7 and 137 DF, p-value: < 2.2e-16
LRMPred <- predict(LRM, data_test)
cor(LRMPred, data_test$DriversKilled)
## [1] 0.848806
plot(LRMPred, data_test$DriversKilled)
abline(a = 0, b = 1)
LRMRSS <- (LRMPred - data_test$DriversKilled)^2
sum(LRMRSS)/nrow(data_test)
## [1] 165.3711
R2_LM <- 1 - sum(LRMRSS)/sum((data_test$DriversKilled - mean(data_train$DriversKilled))^2)
R2_LM
## [1] 0.7185655
Model_Comparison <- data.frame(
Modelo = c("Red Neuronal (12,7)", "Red Neuronal (5,3)", "Regresión Lineal"),
R2 = c(R2_NN1, R2_NN2, R2_LM)
)
Model_Comparison
## Modelo R2
## 1 Red Neuronal (12,7) -0.8433498
## 2 Red Neuronal (5,3) -0.1854314
## 3 Regresión Lineal 0.7185655
Al comparar los modelos, se observa que las redes neuronales presentan bajo desempeño, mientras que la regresión lineal resulta ser el modelo más adecuado para este dataset.
Esto sugiere que las relaciones entre variables son principalmente lineales y que el tamaño del dataset limita el uso de modelos más complejos.
En conclusión, el desempeño de los modelos depende tanto del tamaño del dataset como de la complejidad del modelo.
Las redes neuronales requieren grandes volúmenes de datos para ser efectivas, mientras que modelos más simples pueden ofrecer mejores resultados en contextos con datos limitados.
Este análisis demuestra que una mayor complejidad no garantiza un mejor desempeño.