library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(keras)
library(reticulate)
library(dplyr)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(neuralnet)
##
## Attaching package: 'neuralnet'
##
## The following object is masked from 'package:dplyr':
##
## compute
datos_inegi <- read.csv("trafico_choques.csv")
str(datos_inegi)
## 'data.frame': 1352 obs. of 11 variables:
## $ cve_entidad : int 19 19 19 19 19 19 19 19 19 19 ...
## $ desc_entidad : chr "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ cve_municipio : int 1 1 1 1 1 1 1 1 1 1 ...
## $ desc_municipio: chr "Abasolo" "Abasolo" "Abasolo" "Abasolo" ...
## $ id_indicador : num 6.21e+09 6.21e+09 6.21e+09 6.21e+09 6.21e+09 ...
## $ indicador : chr "VehÃculos de motor registrados en circulación" "VehÃculos de motor registrados en circulación" "VehÃculos de motor registrados en circulación" "VehÃculos de motor registrados en circulación" ...
## $ año : int 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 ...
## $ valor : int 216 228 251 300 397 447 471 503 542 600 ...
## $ choque : int 1 1 1 1 1 1 1 1 0 0 ...
## $ valor_c : int 13 11 11 13 7 7 4 1 0 0 ...
## $ unidad_medida : chr "No especificada" "No especificada" "No especificada" "No especificada" ...
datos_procesados <- datos_inegi[, c("valor_c", "año", "valor", "choque")]
colSums(is.na(datos_procesados))
## valor_c año valor choque
## 0 0 0 0
set.seed(123)
train <- createDataPartition(y = datos_procesados$valor_c, p = 0.8, list = FALSE, times = 1)
data_train <- datos_procesados[train, ]
data_test <- datos_procesados[-train, ]
# Ajustar la red neuronal
RNS <- neuralnet(choque ~ año + valor + valor_c,
data = data_train,
hidden = c(12, 7),
linear.output = TRUE,
lifesign = 'full',
threshold = 0.05,
rep = 4)
## hidden: 12, 7 thresh: 0.05 rep: 1/4 steps: 1000 min thresh: 0.147450204391795
## 2000 min thresh: 0.147450204391795
## 3000 min thresh: 0.147450204391795
## 4000 min thresh: 0.147450204391795
## 5000 min thresh: 0.10620165345522
## 6000 min thresh: 0.10620165345522
## 7000 min thresh: 0.0878303558160084
## 8000 min thresh: 0.0830219385703452
## 9000 min thresh: 0.0830219385703452
## 10000 min thresh: 0.0830219385703452
## 11000 min thresh: 0.0830219385703452
## 12000 min thresh: 0.0830219385703452
## 13000 min thresh: 0.0830219385703452
## 14000 min thresh: 0.0830219385703452
## 15000 min thresh: 0.0830219385703452
## 16000 min thresh: 0.0830219385703452
## 17000 min thresh: 0.0830219385703452
## 18000 min thresh: 0.0830219385703452
## 19000 min thresh: 0.0830219385703452
## 20000 min thresh: 0.0830219385703452
## 21000 min thresh: 0.0830219385703452
## 22000 min thresh: 0.0830219385703452
## 23000 min thresh: 0.0830219385703452
## 24000 min thresh: 0.0830219385703452
## 25000 min thresh: 0.0830219385703452
## 26000 min thresh: 0.0830219385703452
## 27000 min thresh: 0.0830219385703452
## 28000 min thresh: 0.0830219385703452
## 29000 min thresh: 0.0830219385703452
## 30000 min thresh: 0.0830219385703452
## 31000 min thresh: 0.0830219385703452
## 32000 min thresh: 0.0830219385703452
## 33000 min thresh: 0.0830219385703452
## 34000 min thresh: 0.0830219385703452
## 35000 min thresh: 0.0830219385703452
## 36000 min thresh: 0.0830219385703452
## 37000 min thresh: 0.0830219385703452
## 38000 min thresh: 0.0782836322906955
## 39000 min thresh: 0.0741519613087895
## 40000 min thresh: 0.0657552135007406
## 40252 error: 42.32756 time: 30.83 secs
## hidden: 12, 7 thresh: 0.05 rep: 2/4 steps: 1000 min thresh: 0.163351991385583
## 2000 min thresh: 0.108612025324469
## 3000 min thresh: 0.0688313559404173
## 4000 min thresh: 0.0688313559404173
## 5000 min thresh: 0.0670005937956245
## 6000 min thresh: 0.0536729546790522
## 7000 min thresh: 0.0536729546790522
## 8000 min thresh: 0.0536729546790522
## 9000 min thresh: 0.0536729546790522
## 10000 min thresh: 0.0536729546790522
## 11000 min thresh: 0.0536729546790522
## 12000 min thresh: 0.0536729546790522
## 13000 min thresh: 0.0536729546790522
## 13871 error: 35.00507 time: 10.64 secs
## hidden: 12, 7 thresh: 0.05 rep: 3/4 steps: 434 error: 42.80205 time: 0.31 secs
## hidden: 12, 7 thresh: 0.05 rep: 4/4 steps: 300 error: 43.82404 time: 0.22 secs
# Train/test split en matrices y separando variable a predecir
training <- as.matrix(data_train[,1:3])
trainingtarget <- as.matrix(data_train[,4])
test <- as.matrix(data_test[,1:3])
testtarget <- as.matrix(data_test[,4])
# Estandarización de variables
m <- colMeans(training)
s <- apply(training, 2, sd)
training <- scale(training, center = m, scale = s)
test <- scale(test, center = m, scale = s)
data_train_S <- as.data.frame(cbind(training, (trainingtarget - mean(trainingtarget))/sd(trainingtarget)))
colnames(data_train_S) <- colnames(data_train)
RNSVALOR <- neuralnet(choque ~ año + valor + valor_c,
data = data_train_S,
hidden = c(12, 7),
linear.output = TRUE,
lifesign = 'full',
rep = 4,
stepmax = 2000)
## hidden: 12, 7 thresh: 0.01 rep: 1/4 steps: 1000 min thresh: 1.29167941533132
## stepmax min thresh: 1.29167941533132
## hidden: 12, 7 thresh: 0.01 rep: 2/4 steps: 1000 min thresh: 1.04390121809272
## stepmax min thresh: 1.04390121809272
## hidden: 12, 7 thresh: 0.01 rep: 3/4 steps: 1000 min thresh: 0.874235861610313
## stepmax min thresh: 0.874235861610313
## hidden: 12, 7 thresh: 0.01 rep: 4/4 steps: 1000 min thresh: 1.45263390011277
## stepmax min thresh: 1.45263390011277
## Warning: Algorithm did not converge in 4 of 4 repetition(s) within the stepmax.
plot(RNS)
data_test_S <- as.data.frame(test)
colnames(data_test) <- colnames(data_test_S)
RNSPredictions <- predict(RNS, newdata = data_test_S)
RNSPredictions
## [,1]
## 3 -0.963565478
## 10 -1.354864578
## 13 -1.152696147
## 15 -0.922730345
## 28 -0.748334389
## 30 -1.241281319
## 31 -1.370358628
## 32 -1.388513929
## 43 -0.851861035
## 51 -0.045255805
## 52 -0.008768220
## 54 -0.707400607
## 59 -1.378305388
## 71 -0.752799827
## 72 -0.627286380
## 75 -0.132033361
## 90 -1.217922132
## 98 -0.697309454
## 100 -0.366896999
## 104 -0.002482500
## 105 -0.641790634
## 107 -0.662642828
## 108 -0.674370093
## 114 -0.876764630
## 119 -0.777436715
## 121 -0.512266664
## 142 -1.272933167
## 149 -0.792020918
## 153 -0.219467728
## 157 -0.681625903
## 160 -1.257249487
## 163 -1.389536309
## 165 -1.374367724
## 179 -0.229896009
## 201 -0.686371659
## 208 0.031038829
## 210 -0.740539247
## 223 -0.904370288
## 225 -0.850110269
## 233 -0.039046303
## 234 -0.003531855
## 237 -0.923068766
## 243 -1.367402180
## 246 -1.226745660
## 248 -0.988514901
## 257 -0.213275970
## 264 -1.226456447
## 266 -1.387405354
## 269 -1.388555311
## 273 -1.284301249
## 281 -0.816509987
## 284 -0.702841411
## 288 -0.693105305
## 289 -0.740370856
## 290 -0.838018446
## 292 -1.337714364
## 293 -1.414438141
## 310 -0.076545428
## 312 0.009639439
## 313 -0.681143873
## 319 -1.389538971
## 320 -1.392788218
## 322 -1.378983505
## 331 -0.814770602
## 335 -0.322964604
## 338 -0.032895072
## 340 -0.739100594
## 341 -0.912602460
## 342 -1.218956704
## 343 -1.371473992
## 344 -1.392757098
## 349 -1.317960326
## 357 -0.796031342
## 360 -0.389613824
## 373 -1.372370900
## 378 -1.007327492
## 380 -0.868840321
## 386 -0.352863695
## 387 -0.194909833
## 392 0.046636750
## 397 1.724054676
## 401 0.708381601
## 402 0.637619037
## 408 0.370353599
## 410 0.313100765
## 414 0.234541459
## 420 -1.186533544
## 425 -1.364441339
## 430 -0.966902789
## 447 -1.364541970
## 450 -1.379852602
## 451 -1.366411214
## 457 -0.886141948
## 458 -0.859596153
## 459 -0.844814011
## 464 -0.151894356
## 468 0.027752084
## 471 -0.943152386
## 475 -1.388111107
## 483 -0.911379594
## 487 -0.793181835
## 496 -0.667278622
## 498 -0.717059535
## 503 -1.217065618
## 504 -1.105135274
## 506 -0.881927107
## 510 -0.800193100
## 517 -0.063528808
## 519 -0.065988025
## 531 -1.296335033
## 536 -0.864441765
## 538 -0.830586418
## 541 -0.560444855
## 545 -0.053102514
## 555 -1.381578513
## 568 -0.402075071
## 570 -0.120600758
## 571 -0.050576128
## 574 -0.754640453
## 576 -1.258775920
## 578 -1.390309415
## 582 -1.356126868
## 585 -1.156582735
## 593 -0.568718572
## 600 -0.747369657
## 603 -1.371189383
## 608 -1.350549715
## 618 -0.679624630
## 624 0.011814198
## 626 -0.515788349
## 629 -0.174467374
## 631 -0.142254835
## 634 -0.078474446
## 639 0.032150194
## 640 0.011823425
## 647 0.098866508
## 648 0.077290356
## 653 -0.920977508
## 654 -1.225779175
## 655 -1.367105358
## 667 -0.850113595
## 668 -0.830771847
## 673 -0.199846397
## 675 -0.032044067
## 693 -0.853977540
## 696 -0.713461961
## 697 -0.570547015
## 709 -1.390287053
## 711 -1.375058914
## 715 -1.146864930
## 725 -0.266435492
## 730 -0.743595713
## 742 -1.026782064
## 743 -0.924497606
## 754 -0.009535374
## 756 -0.720221236
## 764 -1.307683581
## 765 -1.227932589
## 766 -1.104501491
## 771 -0.831189037
## 777 -0.027544910
## 778 0.004444846
## 788 -1.386199951
## 789 -1.375593950
## 794 -1.033395773
## 796 -0.875016100
## 800 -0.716051081
## 805 -0.050549975
## 808 -0.700571992
## 811 -1.275430175
## 812 -1.367519536
## 814 -1.360559632
## 821 -0.865634278
## 826 -0.609836918
## 830 -0.041833187
## 833 -0.680994854
## 836 -1.252447299
## 839 -1.389917293
## 842 -1.355041214
## 855 -0.239570289
## 858 -0.010354106
## 859 -0.681068864
## 866 -1.384581020
## 867 -1.374073114
## 872 -1.020190711
## 878 -0.711491295
## 882 -0.117447163
## 888 -1.241058013
## 890 -1.388368395
## 892 -1.383032660
## 898 -1.006192592
## 900 -0.869026436
## 906 -0.381545919
## 909 -0.043895754
## 924 -1.015785208
## 926 -0.870955311
## 931 -0.559420619
## 964 -0.756829783
## 969 -1.389516326
## 977 -0.920985882
## 985 -0.229952983
## 986 -0.117860182
## 990 -0.754297285
## 1006 -0.836325902
## 1014 -0.007198644
## 1016 -0.697939448
## 1019 -1.265914917
## 1023 -1.334535664
## 1024 -1.287201253
## 1037 -0.106489301
## 1042 -0.143236420
## 1052 -0.130888804
## 1062 -0.075069673
## 1063 -0.551126858
## 1064 -0.565040773
## 1082 1.536229880
## 1083 1.564244681
## 1084 1.564155804
## 1089 -0.252854647
## 1092 -0.013567383
## 1096 -1.213244739
## 1097 -1.365829292
## 1109 -0.852701437
## 1112 -1.477501187
## 1113 -1.420164431
## 1115 -1.472248562
## 1117 -1.446383735
## 1122 -0.532296776
## 1123 -0.643022870
## 1143 -0.052466849
## 1145 -0.662812839
## 1153 -1.350418929
## 1164 -0.652305017
## 1169 -0.014170461
## 1175 -1.365287562
## 1180 -1.343096819
## 1195 -0.015293128
## 1199 -0.462178573
## 1223 -0.617882028
## 1230 -0.448841764
## 1244 -0.070606883
## 1248 -0.021068896
## 1249 -0.638629479
## 1251 -0.659125568
## 1252 -0.674889152
## 1256 -1.177859981
## 1257 -1.131120171
## 1259 -0.900186655
## 1260 -0.867804445
## 1261 -0.858389396
## 1262 -0.851679941
## 1263 -0.833864280
## 1268 -0.258969320
## 1273 -0.018344927
## 1285 -1.248037411
## 1286 -1.108265074
## 1287 -0.992205569
## 1296 -0.229899664
## 1301 -0.681262241
## 1303 -0.956417866
## 1305 -1.374070524
## 1310 -1.354512557
## 1314 -1.024026919
## 1315 -0.921791148
## 1323 -0.232032508
## 1334 -1.384185747
## 1339 -1.141863796
## 1343 -0.851996200
## 1348 -0.384712082
# Calculate the limits for x and y axes based on the range of predicted and actual values
x_limits <- range(RNSPredictions, testtarget)
y_limits <- range(RNSPredictions, testtarget)
# Add a margin to the limits for better visualization
x_margin <- diff(x_limits) * 0.1 # Adjust the margin as needed (10% in this case)
y_margin <- diff(y_limits) * 0.1
x_limits <- c(x_limits[1] - x_margin, x_limits[2] + x_margin)
y_limits <- c(y_limits[1] - y_margin, y_limits[2] + y_margin)
# Plotting Predictions vs. Actual Values with adjusted limits
plot(RNSPredictions, testtarget,
xlab = "Predicted Values", ylab = "Actual Values",
main = "Predicted vs Actual", xlim = x_limits, ylim = y_limits)
abline(a = 0, b = 1, col = "red") # Line representing perfect predictions

RSSnn <- (RNSPredictions - testtarget)^2
sum(RSSnn)/nrow(testtarget)
## [1] 3.025902
1 - sum(RSSnn)/sum((testtarget - mean(trainingtarget))^2)
## [1] -33.65516