library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(keras)
library(reticulate)
library(dplyr)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(neuralnet)
## 
## Attaching package: 'neuralnet'
## 
## The following object is masked from 'package:dplyr':
## 
##     compute
datos_inegi <- read.csv("trafico_choques.csv")
str(datos_inegi)
## 'data.frame':    1352 obs. of  11 variables:
##  $ cve_entidad   : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ desc_entidad  : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ cve_municipio : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ desc_municipio: chr  "Abasolo" "Abasolo" "Abasolo" "Abasolo" ...
##  $ id_indicador  : num  6.21e+09 6.21e+09 6.21e+09 6.21e+09 6.21e+09 ...
##  $ indicador     : chr  "Vehículos de motor registrados en circulación" "Vehículos de motor registrados en circulación" "Vehículos de motor registrados en circulación" "Vehículos de motor registrados en circulación" ...
##  $ año           : int  1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 ...
##  $ valor         : int  216 228 251 300 397 447 471 503 542 600 ...
##  $ choque        : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ valor_c       : int  13 11 11 13 7 7 4 1 0 0 ...
##  $ unidad_medida : chr  "No especificada" "No especificada" "No especificada" "No especificada" ...
datos_procesados <- datos_inegi[, c("valor_c", "año", "valor", "choque")]
colSums(is.na(datos_procesados))
## valor_c     año   valor  choque 
##       0       0       0       0
set.seed(123)
train <- createDataPartition(y = datos_procesados$valor_c, p = 0.8, list = FALSE, times = 1)
data_train <- datos_procesados[train, ]
data_test <- datos_procesados[-train, ]
# Ajustar la red neuronal
RNS <- neuralnet(choque ~ año + valor + valor_c,
                 data = data_train,
                 hidden = c(12, 7),
                 linear.output = TRUE,
                 lifesign = 'full',
                 threshold = 0.05,
                 rep = 4)
## hidden: 12, 7    thresh: 0.05    rep: 1/4    steps:    1000  min thresh: 0.147450204391795
##                                                        2000  min thresh: 0.147450204391795
##                                                        3000  min thresh: 0.147450204391795
##                                                        4000  min thresh: 0.147450204391795
##                                                        5000  min thresh: 0.10620165345522
##                                                        6000  min thresh: 0.10620165345522
##                                                        7000  min thresh: 0.0878303558160084
##                                                        8000  min thresh: 0.0830219385703452
##                                                        9000  min thresh: 0.0830219385703452
##                                                       10000  min thresh: 0.0830219385703452
##                                                       11000  min thresh: 0.0830219385703452
##                                                       12000  min thresh: 0.0830219385703452
##                                                       13000  min thresh: 0.0830219385703452
##                                                       14000  min thresh: 0.0830219385703452
##                                                       15000  min thresh: 0.0830219385703452
##                                                       16000  min thresh: 0.0830219385703452
##                                                       17000  min thresh: 0.0830219385703452
##                                                       18000  min thresh: 0.0830219385703452
##                                                       19000  min thresh: 0.0830219385703452
##                                                       20000  min thresh: 0.0830219385703452
##                                                       21000  min thresh: 0.0830219385703452
##                                                       22000  min thresh: 0.0830219385703452
##                                                       23000  min thresh: 0.0830219385703452
##                                                       24000  min thresh: 0.0830219385703452
##                                                       25000  min thresh: 0.0830219385703452
##                                                       26000  min thresh: 0.0830219385703452
##                                                       27000  min thresh: 0.0830219385703452
##                                                       28000  min thresh: 0.0830219385703452
##                                                       29000  min thresh: 0.0830219385703452
##                                                       30000  min thresh: 0.0830219385703452
##                                                       31000  min thresh: 0.0830219385703452
##                                                       32000  min thresh: 0.0830219385703452
##                                                       33000  min thresh: 0.0830219385703452
##                                                       34000  min thresh: 0.0830219385703452
##                                                       35000  min thresh: 0.0830219385703452
##                                                       36000  min thresh: 0.0830219385703452
##                                                       37000  min thresh: 0.0830219385703452
##                                                       38000  min thresh: 0.0782836322906955
##                                                       39000  min thresh: 0.0741519613087895
##                                                       40000  min thresh: 0.0657552135007406
##                                                       40252  error: 42.32756 time: 30.83 secs
## hidden: 12, 7    thresh: 0.05    rep: 2/4    steps:    1000  min thresh: 0.163351991385583
##                                                        2000  min thresh: 0.108612025324469
##                                                        3000  min thresh: 0.0688313559404173
##                                                        4000  min thresh: 0.0688313559404173
##                                                        5000  min thresh: 0.0670005937956245
##                                                        6000  min thresh: 0.0536729546790522
##                                                        7000  min thresh: 0.0536729546790522
##                                                        8000  min thresh: 0.0536729546790522
##                                                        9000  min thresh: 0.0536729546790522
##                                                       10000  min thresh: 0.0536729546790522
##                                                       11000  min thresh: 0.0536729546790522
##                                                       12000  min thresh: 0.0536729546790522
##                                                       13000  min thresh: 0.0536729546790522
##                                                       13871  error: 35.00507 time: 10.64 secs
## hidden: 12, 7    thresh: 0.05    rep: 3/4    steps:     434  error: 42.80205 time: 0.31 secs
## hidden: 12, 7    thresh: 0.05    rep: 4/4    steps:     300  error: 43.82404 time: 0.22 secs
# Train/test split en matrices y separando variable a predecir
training <- as.matrix(data_train[,1:3])
trainingtarget <- as.matrix(data_train[,4])
test <- as.matrix(data_test[,1:3])
testtarget <- as.matrix(data_test[,4])

# Estandarización de variables
m <- colMeans(training)
s <- apply(training, 2, sd)
training <- scale(training, center = m, scale = s)
test <- scale(test, center = m, scale = s)
data_train_S <- as.data.frame(cbind(training, (trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)

RNSVALOR <- neuralnet(choque ~ año + valor + valor_c,
                 data = data_train_S,
                 hidden = c(12, 7),
                 linear.output = TRUE, 
                 lifesign = 'full',
                 rep = 4,
                 stepmax = 2000)
## hidden: 12, 7    thresh: 0.01    rep: 1/4    steps:    1000  min thresh: 1.29167941533132
##                                                     stepmax  min thresh: 1.29167941533132
## hidden: 12, 7    thresh: 0.01    rep: 2/4    steps:    1000  min thresh: 1.04390121809272
##                                                     stepmax  min thresh: 1.04390121809272
## hidden: 12, 7    thresh: 0.01    rep: 3/4    steps:    1000  min thresh: 0.874235861610313
##                                                     stepmax  min thresh: 0.874235861610313
## hidden: 12, 7    thresh: 0.01    rep: 4/4    steps:    1000  min thresh: 1.45263390011277
##                                                     stepmax  min thresh: 1.45263390011277
## Warning: Algorithm did not converge in 4 of 4 repetition(s) within the stepmax.
plot(RNS)
data_test_S <- as.data.frame(test)
colnames(data_test) <- colnames(data_test_S)
RNSPredictions <- predict(RNS, newdata = data_test_S)
RNSPredictions
##              [,1]
## 3    -0.963565478
## 10   -1.354864578
## 13   -1.152696147
## 15   -0.922730345
## 28   -0.748334389
## 30   -1.241281319
## 31   -1.370358628
## 32   -1.388513929
## 43   -0.851861035
## 51   -0.045255805
## 52   -0.008768220
## 54   -0.707400607
## 59   -1.378305388
## 71   -0.752799827
## 72   -0.627286380
## 75   -0.132033361
## 90   -1.217922132
## 98   -0.697309454
## 100  -0.366896999
## 104  -0.002482500
## 105  -0.641790634
## 107  -0.662642828
## 108  -0.674370093
## 114  -0.876764630
## 119  -0.777436715
## 121  -0.512266664
## 142  -1.272933167
## 149  -0.792020918
## 153  -0.219467728
## 157  -0.681625903
## 160  -1.257249487
## 163  -1.389536309
## 165  -1.374367724
## 179  -0.229896009
## 201  -0.686371659
## 208   0.031038829
## 210  -0.740539247
## 223  -0.904370288
## 225  -0.850110269
## 233  -0.039046303
## 234  -0.003531855
## 237  -0.923068766
## 243  -1.367402180
## 246  -1.226745660
## 248  -0.988514901
## 257  -0.213275970
## 264  -1.226456447
## 266  -1.387405354
## 269  -1.388555311
## 273  -1.284301249
## 281  -0.816509987
## 284  -0.702841411
## 288  -0.693105305
## 289  -0.740370856
## 290  -0.838018446
## 292  -1.337714364
## 293  -1.414438141
## 310  -0.076545428
## 312   0.009639439
## 313  -0.681143873
## 319  -1.389538971
## 320  -1.392788218
## 322  -1.378983505
## 331  -0.814770602
## 335  -0.322964604
## 338  -0.032895072
## 340  -0.739100594
## 341  -0.912602460
## 342  -1.218956704
## 343  -1.371473992
## 344  -1.392757098
## 349  -1.317960326
## 357  -0.796031342
## 360  -0.389613824
## 373  -1.372370900
## 378  -1.007327492
## 380  -0.868840321
## 386  -0.352863695
## 387  -0.194909833
## 392   0.046636750
## 397   1.724054676
## 401   0.708381601
## 402   0.637619037
## 408   0.370353599
## 410   0.313100765
## 414   0.234541459
## 420  -1.186533544
## 425  -1.364441339
## 430  -0.966902789
## 447  -1.364541970
## 450  -1.379852602
## 451  -1.366411214
## 457  -0.886141948
## 458  -0.859596153
## 459  -0.844814011
## 464  -0.151894356
## 468   0.027752084
## 471  -0.943152386
## 475  -1.388111107
## 483  -0.911379594
## 487  -0.793181835
## 496  -0.667278622
## 498  -0.717059535
## 503  -1.217065618
## 504  -1.105135274
## 506  -0.881927107
## 510  -0.800193100
## 517  -0.063528808
## 519  -0.065988025
## 531  -1.296335033
## 536  -0.864441765
## 538  -0.830586418
## 541  -0.560444855
## 545  -0.053102514
## 555  -1.381578513
## 568  -0.402075071
## 570  -0.120600758
## 571  -0.050576128
## 574  -0.754640453
## 576  -1.258775920
## 578  -1.390309415
## 582  -1.356126868
## 585  -1.156582735
## 593  -0.568718572
## 600  -0.747369657
## 603  -1.371189383
## 608  -1.350549715
## 618  -0.679624630
## 624   0.011814198
## 626  -0.515788349
## 629  -0.174467374
## 631  -0.142254835
## 634  -0.078474446
## 639   0.032150194
## 640   0.011823425
## 647   0.098866508
## 648   0.077290356
## 653  -0.920977508
## 654  -1.225779175
## 655  -1.367105358
## 667  -0.850113595
## 668  -0.830771847
## 673  -0.199846397
## 675  -0.032044067
## 693  -0.853977540
## 696  -0.713461961
## 697  -0.570547015
## 709  -1.390287053
## 711  -1.375058914
## 715  -1.146864930
## 725  -0.266435492
## 730  -0.743595713
## 742  -1.026782064
## 743  -0.924497606
## 754  -0.009535374
## 756  -0.720221236
## 764  -1.307683581
## 765  -1.227932589
## 766  -1.104501491
## 771  -0.831189037
## 777  -0.027544910
## 778   0.004444846
## 788  -1.386199951
## 789  -1.375593950
## 794  -1.033395773
## 796  -0.875016100
## 800  -0.716051081
## 805  -0.050549975
## 808  -0.700571992
## 811  -1.275430175
## 812  -1.367519536
## 814  -1.360559632
## 821  -0.865634278
## 826  -0.609836918
## 830  -0.041833187
## 833  -0.680994854
## 836  -1.252447299
## 839  -1.389917293
## 842  -1.355041214
## 855  -0.239570289
## 858  -0.010354106
## 859  -0.681068864
## 866  -1.384581020
## 867  -1.374073114
## 872  -1.020190711
## 878  -0.711491295
## 882  -0.117447163
## 888  -1.241058013
## 890  -1.388368395
## 892  -1.383032660
## 898  -1.006192592
## 900  -0.869026436
## 906  -0.381545919
## 909  -0.043895754
## 924  -1.015785208
## 926  -0.870955311
## 931  -0.559420619
## 964  -0.756829783
## 969  -1.389516326
## 977  -0.920985882
## 985  -0.229952983
## 986  -0.117860182
## 990  -0.754297285
## 1006 -0.836325902
## 1014 -0.007198644
## 1016 -0.697939448
## 1019 -1.265914917
## 1023 -1.334535664
## 1024 -1.287201253
## 1037 -0.106489301
## 1042 -0.143236420
## 1052 -0.130888804
## 1062 -0.075069673
## 1063 -0.551126858
## 1064 -0.565040773
## 1082  1.536229880
## 1083  1.564244681
## 1084  1.564155804
## 1089 -0.252854647
## 1092 -0.013567383
## 1096 -1.213244739
## 1097 -1.365829292
## 1109 -0.852701437
## 1112 -1.477501187
## 1113 -1.420164431
## 1115 -1.472248562
## 1117 -1.446383735
## 1122 -0.532296776
## 1123 -0.643022870
## 1143 -0.052466849
## 1145 -0.662812839
## 1153 -1.350418929
## 1164 -0.652305017
## 1169 -0.014170461
## 1175 -1.365287562
## 1180 -1.343096819
## 1195 -0.015293128
## 1199 -0.462178573
## 1223 -0.617882028
## 1230 -0.448841764
## 1244 -0.070606883
## 1248 -0.021068896
## 1249 -0.638629479
## 1251 -0.659125568
## 1252 -0.674889152
## 1256 -1.177859981
## 1257 -1.131120171
## 1259 -0.900186655
## 1260 -0.867804445
## 1261 -0.858389396
## 1262 -0.851679941
## 1263 -0.833864280
## 1268 -0.258969320
## 1273 -0.018344927
## 1285 -1.248037411
## 1286 -1.108265074
## 1287 -0.992205569
## 1296 -0.229899664
## 1301 -0.681262241
## 1303 -0.956417866
## 1305 -1.374070524
## 1310 -1.354512557
## 1314 -1.024026919
## 1315 -0.921791148
## 1323 -0.232032508
## 1334 -1.384185747
## 1339 -1.141863796
## 1343 -0.851996200
## 1348 -0.384712082
# Calculate the limits for x and y axes based on the range of predicted and actual values
x_limits <- range(RNSPredictions, testtarget)
y_limits <- range(RNSPredictions, testtarget)

# Add a margin to the limits for better visualization
x_margin <- diff(x_limits) * 0.1  # Adjust the margin as needed (10% in this case)
y_margin <- diff(y_limits) * 0.1

x_limits <- c(x_limits[1] - x_margin, x_limits[2] + x_margin)
y_limits <- c(y_limits[1] - y_margin, y_limits[2] + y_margin)

# Plotting Predictions vs. Actual Values with adjusted limits
plot(RNSPredictions, testtarget, 
     xlab = "Predicted Values", ylab = "Actual Values", 
     main = "Predicted vs Actual", xlim = x_limits, ylim = y_limits)
abline(a = 0, b = 1, col = "red")  # Line representing perfect predictions

RSSnn <- (RNSPredictions - testtarget)^2
sum(RSSnn)/nrow(testtarget)
## [1] 3.025902
1 - sum(RSSnn)/sum((testtarget - mean(trainingtarget))^2)
## [1] -33.65516