Simulación
options(scipen = 999999)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.0.5
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.0.5
##
## Attaching package: 'DescTools'
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.0.5
set.seed(50)
numero_de_muestras<-1000
data(BostonHousing) #Para llamar datos de una biblioteca (como mlblench), usamos "data()"
housing <- BostonHousing #Para usar la base de datos le asignamos a la variable "housing"
stargazer(housing,title = "datos BostonHousing",type = "html") # Obtenemos un data frame con la informacion de la base Bostonhousing
datos BostonHousing
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Pctl(25)
|
Pctl(75)
|
Max
|
|
|
|
crim
|
506
|
3.614
|
8.602
|
0.006
|
0.082
|
3.677
|
88.976
|
|
zn
|
506
|
11.364
|
23.322
|
0
|
0
|
12.5
|
100
|
|
indus
|
506
|
11.137
|
6.860
|
0.460
|
5.190
|
18.100
|
27.740
|
|
nox
|
506
|
0.555
|
0.116
|
0.385
|
0.449
|
0.624
|
0.871
|
|
rm
|
506
|
6.285
|
0.703
|
3.561
|
5.886
|
6.624
|
8.780
|
|
age
|
506
|
68.575
|
28.149
|
2.900
|
45.025
|
94.075
|
100.000
|
|
dis
|
506
|
3.795
|
2.106
|
1.130
|
2.100
|
5.188
|
12.126
|
|
rad
|
506
|
9.549
|
8.707
|
1
|
4
|
24
|
24
|
|
tax
|
506
|
408.237
|
168.537
|
187
|
279
|
666
|
711
|
|
ptratio
|
506
|
18.456
|
2.165
|
12.600
|
17.400
|
20.200
|
22.000
|
|
b
|
506
|
356.674
|
91.295
|
0.320
|
375.378
|
396.225
|
396.900
|
|
lstat
|
506
|
12.653
|
7.141
|
1.730
|
6.950
|
16.955
|
37.970
|
|
medv
|
506
|
22.533
|
9.197
|
5
|
17.0
|
25
|
50
|
|
|
muestras<- BostonHousing$medv %>%
createDataPartition(p = 0.8,
times = numero_de_muestras,
list = TRUE)
Modelos_Entrenamiento<-vector(mode = "list",
length = numero_de_muestras)
Pronostico_Prueba<-vector(mode = "list",
length = numero_de_muestras)
Resultados_Performance_data_entrenamiento<-vector(mode = "list",
length = numero_de_muestras)
Resultados_Performance<-vector(mode = "list",
length = numero_de_muestras)
for(j in 1:numero_de_muestras){
Datos_Entrenamiento<- BostonHousing[muestras[[j]], ]
Datos_Prueba<- BostonHousing[-muestras[[j]], ]
Modelos_Entrenamiento[[j]]<-lm(formula = medv~.,data=Datos_Entrenamiento)
Pronostico_Prueba[[j]]<-Modelos_Entrenamiento[[j]] %>% predict(Datos_Prueba)
Resultados_Performance_data_entrenamiento[[j]]<-data.frame(
R2 = R2(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv),
RMSE = RMSE(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv),
MAE = MAE(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv),
MAPE= MAPE(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv)*100,
THEIL=TheilU(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv,type = 1),
Um=Um(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv),
Us=Us(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv),
Uc=Uc(Modelos_Entrenamiento[[j]]$fitted.values,
Datos_Entrenamiento$medv)
)
Resultados_Performance[[j]]<-data.frame(
R2 = R2(Pronostico_Prueba[[j]], Datos_Prueba$medv),
RMSE = RMSE(Pronostico_Prueba[[j]], Datos_Prueba$medv),
MAE = MAE(Pronostico_Prueba[[j]], Datos_Prueba$medv),
MAPE= MAPE(Pronostico_Prueba[[j]], Datos_Prueba$medv)*100,
THEIL=TheilU(Pronostico_Prueba[[j]], Datos_Prueba$medv,
type = 1), # También se puede usar la función que creamos: THEIL_U
Um=Um(Pronostico_Prueba[[j]], Datos_Prueba$medv),
Us=Us(Pronostico_Prueba[[j]], Datos_Prueba$medv),
Uc=Uc(Pronostico_Prueba[[j]], Datos_Prueba$medv)
)
}
#Resultados
bind_rows(Resultados_Performance_data_entrenamiento) %>%
stargazer(title = "Medidas de Performance Datos del Modelo",
type = "html",
digits = 3)
Medidas de Performance Datos del Modelo
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Pctl(25)
|
Pctl(75)
|
Max
|
|
|
|
R2
|
1,000
|
0.743
|
0.013
|
0.713
|
0.734
|
0.751
|
0.794
|
|
RMSE
|
1,000
|
4.653
|
0.141
|
4.177
|
4.565
|
4.759
|
4.948
|
|
MAE
|
1,000
|
3.265
|
0.095
|
2.905
|
3.204
|
3.332
|
3.512
|
|
MAPE
|
1,000
|
16.387
|
0.464
|
14.813
|
16.085
|
16.718
|
17.691
|
|
THEIL
|
1,000
|
0.096
|
0.003
|
0.087
|
0.095
|
0.099
|
0.102
|
|
Um
|
1,000
|
0.000
|
0.000
|
0
|
0
|
0
|
0
|
|
Us
|
1,000
|
0.074
|
0.004
|
0.058
|
0.072
|
0.077
|
0.085
|
|
Uc
|
1,000
|
0.928
|
0.004
|
0.918
|
0.925
|
0.931
|
0.945
|
|
|
Resultados