library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(keras)
library(reticulate)
library(dplyr)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(neuralnet)
## 
## Attaching package: 'neuralnet'
## 
## The following object is masked from 'package:dplyr':
## 
##     compute

Cargamos base de datos

# Cargamos la base de datos
datos_inegi <- read.csv("/Users/jenaromtzg/Desktop/trafico_choques.csv")
# La visualizamos para tener mayor entendimiento. 
str(datos_inegi)
## 'data.frame':    1352 obs. of  11 variables:
##  $ cve_entidad   : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ desc_entidad  : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ cve_municipio : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ desc_municipio: chr  "Abasolo" "Abasolo" "Abasolo" "Abasolo" ...
##  $ id_indicador  : num  6.21e+09 6.21e+09 6.21e+09 6.21e+09 6.21e+09 ...
##  $ indicador     : chr  "Vehículos de motor registrados en circulación" "Vehículos de motor registrados en circulación" "Vehículos de motor registrados en circulación" "Vehículos de motor registrados en circulación" ...
##  $ año           : int  1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 ...
##  $ valor         : int  216 228 251 300 397 447 471 503 542 600 ...
##  $ choque        : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ valor_c       : int  13 11 11 13 7 7 4 1 0 0 ...
##  $ unidad_medida : chr  "No especificada" "No especificada" "No especificada" "No especificada" ...

Se seleccionan columnas específicas (valor_c, año, valor, choque) del dataframe datos_inegi y se almacenan en datos_procesados.

# Seleccionamos solamente las columnas que nos sirven y son de interer, solo las nuemricas relevantes
datos_procesados <- datos_inegi[, c("valor_c", "año", "valor", "choque")]
# checamos por si existen datos nulos o faltantes
colSums(is.na(datos_procesados))
## valor_c     año   valor  choque 
##       0       0       0       0

Se divide el dataset en conjuntos de entrenamiento (data_train) y prueba (data_test) utilizando createDataPartition() de la librería caret.

set.seed(123)
train <- createDataPartition(y = datos_procesados$valor_c, p = 0.8, list = FALSE, times = 1)
data_train <- datos_procesados[train, ]
data_test <- datos_procesados[-train, ]

Se entrena una red neuronal utilizando la función neuralnet() con la fórmula choque ~ año + valor + valor_c utilizando los datos de entrenamiento (data_train).

# Se entrena una red neuronal utilizando la función neuralnet() con la fórmula choque ~ año + valor + valor_c utilizando los datos de entrenamiento (data_train).
RNS <- neuralnet(choque ~ año + valor + valor_c,
                 data = data_train,
                 hidden = c(12, 7),
                 linear.output = FALSE,
                 lifesign = 'full',
                 threshold = 0.05,
                 rep = 4)
## hidden: 12, 7    thresh: 0.05    rep: 1/4    steps:     674  error: 34.95522 time: 0.51 secs
## hidden: 12, 7    thresh: 0.05    rep: 2/4    steps:      21  error: 46.40278 time: 0.01 secs
## hidden: 12, 7    thresh: 0.05    rep: 3/4    steps:    1000  min thresh: 0.0965643962894402
##                                                        1251  error: 27.32689 time: 0.84 secs
## hidden: 12, 7    thresh: 0.05    rep: 4/4    steps:    1000  min thresh: 0.0508428234425437
##                                                        2000  min thresh: 0.0508428234425437
##                                                        2274  error: 31.16468 time: 1.34 secs
# Train/test split en matrices y separando variable a predecir
training <- as.matrix(data_train[,1:3])
trainingtarget <- as.matrix(data_train[,4])
test <- as.matrix(data_test[,1:3])
testtarget <- as.matrix(data_test[,4])

Se entrena una segunda red neuronal (RNSVALOR) utilizando los datos estandarizados (data_train_S) y los mismos parámetros anteriores.

# Estandarización de variables
m <- colMeans(training)
s <- apply(training, 2, sd)
training <- scale(training, center = m, scale = s)
test <- scale(test, center = m, scale = s)
data_train_S <- as.data.frame(cbind(training, (trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)

RNSVALOR <- neuralnet(choque ~ año + valor + valor_c,
                 data = data_train_S,
                 hidden = c(12, 7),
                 linear.output = FALSE, 
                 lifesign = 'full',
                 rep = 4,
                 stepmax = 2000)
## hidden: 12, 7    thresh: 0.01    rep: 1/4    steps:    1000  min thresh: 0.191906513687783
##                                                     stepmax  min thresh: 0.191906513687783
## hidden: 12, 7    thresh: 0.01    rep: 2/4    steps:    1000  min thresh: 0.145631969901857
##                                                     stepmax  min thresh: 0.145631969901857
## hidden: 12, 7    thresh: 0.01    rep: 3/4    steps:    1000  min thresh: 0.128885661086698
##                                                     stepmax  min thresh: 0.128885661086698
## hidden: 12, 7    thresh: 0.01    rep: 4/4    steps:    1000  min thresh: 0.129581389172468
##                                                     stepmax  min thresh: 0.129581389172468
## Warning: Algorithm did not converge in 4 of 4 repetition(s) within the stepmax.

Se grafica el modelo de red neuronal entrenado con la función plot().

plot(RNS)
data_test_S <- as.data.frame(test)
colnames(data_test) <- colnames(data_test_S)

Se realizan predicciones utilizando la red neuronal (RNS) sobre los datos de prueba (data_test_S).

RNSPredictions <- predict(RNS, newdata = data_test_S)
RNSPredictions
##             [,1]
## 3    0.998326119
## 10   0.998321755
## 13   0.998318386
## 15   0.998315270
## 28   0.998326582
## 30   0.998325904
## 31   0.998325498
## 32   0.998324986
## 43   0.998312419
## 51   0.998288594
## 52   0.998282256
## 54   0.998326855
## 59   0.998325581
## 71   0.998316036
## 72   0.998314887
## 75   0.998312104
## 90   0.998321394
## 98   0.998306595
## 100  0.998301798
## 104  0.998289445
## 105  0.998327791
## 107  0.998327773
## 108  0.998327863
## 114  0.998328560
## 119  0.998328560
## 121  0.998328560
## 142  0.998313565
## 149  0.998307761
## 153  0.998297556
## 157  0.998326808
## 160  0.998325787
## 163  0.998324195
## 165  0.998322679
## 179  0.998295432
## 201  0.998321046
## 208  0.998326875
## 210  0.998326572
## 223  0.998317171
## 225  0.998313926
## 233  0.998292051
## 234  0.998288544
## 237  0.998326330
## 243  0.998323486
## 246  0.998320530
## 248  0.998318350
## 257  0.998297829
## 264  0.998325947
## 266  0.998325121
## 269  0.998305330
## 273  0.998201968
## 281  0.992696679
## 284  0.812453909
## 288  0.997745528
## 289  0.997433741
## 290  0.997272932
## 292  0.997208909
## 293  0.998039483
## 310  0.998303213
## 312  0.998298923
## 313  0.998326761
## 319  0.998324116
## 320  0.998316917
## 322  0.998301804
## 331  0.998272027
## 335  0.998209202
## 338  0.998191714
## 340  0.998323147
## 341  0.998323108
## 342  0.998322717
## 343  0.998323163
## 344  0.998321769
## 349  0.998320466
## 357  0.998305605
## 360  0.998298139
## 373  0.998322873
## 378  0.998317885
## 380  0.998314716
## 386  0.998303574
## 387  0.998301936
## 392  0.998328560
## 397  0.998328560
## 401  0.998328560
## 402  0.998328560
## 408  0.998328560
## 410  0.998328560
## 414  0.998328560
## 420  0.998326129
## 425  0.998323417
## 430  0.998319631
## 447  0.998325674
## 450  0.998324212
## 451  0.998323607
## 457  0.998319568
## 458  0.998318735
## 459  0.998318049
## 464  0.998323484
## 468  0.998328557
## 471  0.998326247
## 475  0.998324442
## 483  0.998316293
## 487  0.998307061
## 496  0.998321533
## 498  0.998320289
## 503  0.998327886
## 504  0.998328502
## 506  0.998328559
## 510  0.998328559
## 517  0.998328560
## 519  0.998328560
## 531  0.998322138
## 536  0.998316434
## 538  0.998311554
## 541  0.998296021
## 545  0.998259409
## 555  0.998318611
## 568  0.998294200
## 570  0.998289435
## 571  0.998283600
## 574  0.998325982
## 576  0.998325511
## 578  0.998324490
## 582  0.998321352
## 585  0.998317816
## 593  0.998301214
## 600  0.998326583
## 603  0.998325469
## 608  0.998322161
## 618  0.998309194
## 624  0.998301203
## 626  0.998328560
## 629  0.998328560
## 631  0.998328560
## 634  0.998328560
## 639  0.998328560
## 640  0.998328560
## 647  0.998328560
## 648  0.998328560
## 653  0.998326073
## 654  0.998325783
## 655  0.998325428
## 667  0.998313994
## 668  0.998311553
## 673  0.998301061
## 675  0.998295247
## 693  0.998309020
## 696  0.998303219
## 697  0.998300749
## 709  0.998323462
## 711  0.998321924
## 715  0.998317901
## 725  0.998266950
## 730  0.998323285
## 742  0.998316852
## 743  0.998315071
## 754  0.998283369
## 756  0.998326760
## 764  0.998324645
## 765  0.998324549
## 766  0.998324485
## 771  0.998323167
## 777  0.998328559
## 778  0.998328559
## 788  0.998322623
## 789  0.998321932
## 794  0.998314294
## 796  0.998309528
## 800  0.998300704
## 805  0.998281568
## 808  0.998327015
## 811  0.998326634
## 812  0.998326378
## 814  0.998325825
## 821  0.998322970
## 826  0.998316447
## 830  0.998312726
## 833  0.998326792
## 836  0.998325654
## 839  0.998323936
## 842  0.998321400
## 855  0.998291429
## 858  0.998282382
## 859  0.998326815
## 866  0.998323513
## 867  0.998322640
## 872  0.998317083
## 878  0.998304069
## 882  0.998291810
## 888  0.998325905
## 890  0.998325003
## 892  0.998323787
## 898  0.998317910
## 900  0.998314589
## 906  0.998299802
## 909  0.998289274
## 924  0.998317356
## 926  0.998313837
## 931  0.998302556
## 964  0.998326501
## 969  0.998324208
## 977  0.998315298
## 985  0.998295331
## 986  0.998290637
## 990  0.998326299
## 1006 0.998304436
## 1014 0.998285598
## 1016 0.998327073
## 1019 0.998326677
## 1023 0.998325629
## 1024 0.998325419
## 1037 0.998315803
## 1042 0.998328560
## 1052 0.998328560
## 1062 0.998132354
## 1063 0.998130458
## 1064 0.998130522
## 1082 0.001235698
## 1083 0.001234928
## 1084 0.001241379
## 1089 0.998283268
## 1092 0.998273650
## 1096 0.998324658
## 1097 0.998324291
## 1109 0.998306517
## 1112 0.001759032
## 1113 0.003493086
## 1115 0.001671428
## 1117 0.001693356
## 1122 0.002622221
## 1123 0.002709232
## 1143 0.998280801
## 1145 0.998326627
## 1153 0.998324435
## 1164 0.998312626
## 1169 0.998303846
## 1175 0.998325654
## 1180 0.998322790
## 1195 0.998302615
## 1199 0.998328560
## 1223 0.998328439
## 1230 0.998328559
## 1244 0.998328559
## 1248 0.998328559
## 1249 0.998327875
## 1251 0.998327840
## 1252 0.998327856
## 1256 0.998328469
## 1257 0.998328543
## 1259 0.998328559
## 1260 0.998328560
## 1261 0.998328560
## 1262 0.998328560
## 1263 0.998328560
## 1268 0.998328559
## 1273 0.998328559
## 1285 0.998319891
## 1286 0.998323363
## 1287 0.998321916
## 1296 0.998316185
## 1301 0.998326672
## 1303 0.998326069
## 1305 0.998325357
## 1310 0.998321694
## 1314 0.998316926
## 1315 0.998315298
## 1323 0.998295019
## 1334 0.998323574
## 1339 0.998318847
## 1343 0.998312263
## 1348 0.998299357

Se grafican las predicciones vs. los valores reales y se calculan algunas métricas de evaluación del modelo, como el error cuadrático medio (RSSnn) y el coeficiente de determinación ajustado

# Calculate the limits for x and y axes based on the range of predicted and actual values
x_limits <- range(RNSPredictions, testtarget)
y_limits <- range(RNSPredictions, testtarget)

# Add a margin to the limits for better visualization
x_margin <- diff(x_limits) * 0.1  # Adjust the margin as needed (10% in this case)
y_margin <- diff(y_limits) * 0.1

x_limits <- c(x_limits[1] - x_margin, x_limits[2] + x_margin)
y_limits <- c(y_limits[1] - y_margin, y_limits[2] + y_margin)

# Plotting Predictions vs. Actual Values with adjusted limits
plot(RNSPredictions, testtarget, 
     xlab = "Predicted Values", ylab = "Actual Values", 
     main = "Predicted vs Actual", xlim = x_limits, ylim = y_limits)
abline(a = 0, b = 1, col = "red")  # Line representing perfect predictions

RSSnn <- (RNSPredictions - testtarget)^2
sum(RSSnn)/nrow(testtarget)
## [1] 0.1297891
1 - sum(RSSnn)/sum((testtarget - mean(trainingtarget))^2)
## [1] -0.4864533
plot(RSSnn)