Modelo Original
options(scipen = 99999)
load("C:/Users/Economia/Desktop/smoke.RData") # en otros ejercicios podría ser necesario importar un csv o un archivo de Excel
data<-data #aquí se puede cambiar el dataframe de manera sencilla
equation<-as.formula("cigs~cigpric+lcigpric+income+lincome+age+agesq+educ+white+restaurn") #Guardar la fórmula del modelo para no tener que estar escribiendola en cada ocasión
endogena<-data$cigs #aquí designamos la variable endógena del modelo, esto es necesario para el script de simulación.
Modelo_smoke<-lm(formula =equation,data = data)
stargazer::stargazer(Modelo_smoke,
title = "Modelo Estimado",
type = "html",
digits = 8)
Modelo Estimado
|
|
|
|
Dependent variable:
|
|
|
|
|
|
cigs
|
|
|
|
cigpric
|
2.00226800
|
|
|
(1.49283100)
|
|
|
|
|
lcigpric
|
-115.27350000
|
|
|
(85.42432000)
|
|
|
|
|
income
|
-0.00004619
|
|
|
(0.00013349)
|
|
|
|
|
lincome
|
1.40406100
|
|
|
(1.70816600)
|
|
|
|
|
age
|
0.77835900***
|
|
|
(0.16055560)
|
|
|
|
|
agesq
|
-0.00915035***
|
|
|
(0.00174929)
|
|
|
|
|
educ
|
-0.49478060***
|
|
|
(0.16818020)
|
|
|
|
|
white
|
-0.53105160
|
|
|
(1.46072200)
|
|
|
|
|
restaurn
|
-2.64424100**
|
|
|
(1.12999900)
|
|
|
|
|
Constant
|
340.80440000
|
|
|
(260.01560000)
|
|
|
|
|
|
|
Observations
|
807
|
|
R2
|
0.05515398
|
|
Adjusted R2
|
0.04448445
|
|
Residual Std. Error
|
13.41285000 (df = 797)
|
|
F Statistic
|
5.16929800*** (df = 9; 797)
|
|
|
|
Note:
|
p<0.1; p<0.05;
p<0.01
|
#Funciones para la desigualdad de Theil
Um<-function(pronosticado,observado){
library(DescTools)
((mean(pronosticado)-mean(observado))^2)/MSE(pronosticado,observado)
}
Us<-function(pronosticado,observado){
library(DescTools)
((sd(pronosticado)-sd(observado))^2)/MSE(pronosticado,observado)
}
Uc<-function(pronosticado,observado){
library(DescTools)
(2*(1-cor(pronosticado,observado))*sd(pronosticado)*sd(observado))/MSE(pronosticado,observado)}
THEIL_U<-function(pronosticado,observado){
library(DescTools)
RMSE(pronosticado,observado)/(sqrt(mean(pronosticado^2))+sqrt(mean(observado^2)))
}
Simulación
#Script de Simulación versión 2
options(scipen = 999999)
library(dplyr)
library(caret)
library(DescTools)
library(stargazer)
set.seed(50)
numero_de_muestras<-500
proporcion_entrenamiento<-0.75
# Creación de las muestras, aquí usamos la variable endógena que definimos con anterioridad
muestras<- endogena %>%
createDataPartition(p = proporcion_entrenamiento,
times = numero_de_muestras,
list = TRUE)
#Listas vacias para la simulación
Modelos_Entrenamiento<-vector(mode = "list",
length = numero_de_muestras)
Pronostico_Prueba<-vector(mode = "list",
length = numero_de_muestras)
Resultados_Performance_data_entrenamiento<-vector(mode = "list",
length = numero_de_muestras)
Resultados_Performance<-vector(mode = "list",
length = numero_de_muestras)
# Estimar los modelos de cada muestra y sus medidas de desempeño predictivo
for(j in 1:numero_de_muestras){
Datos_Entrenamiento<- data[muestras[[j]], ]
Datos_Prueba<- data[-muestras[[j]], ]
Modelos_Entrenamiento[[j]]<-lm(formula = equation,data=Datos_Entrenamiento)
Pronostico_Prueba[[j]]<-Modelos_Entrenamiento[[j]] %>% predict(Datos_Prueba)
Fe<-Modelos_Entrenamiento[[j]]$fitted.values
Ye<-Datos_Entrenamiento$cigs
Resultados_Performance_data_entrenamiento[[j]]<-data.frame(
R2 = R2(Fe,Ye),
RMSE = RMSE(Fe,Ye),
MAE = MAE(Fe,Ye),
MAPE= MAPE(Fe,Ye)*100,
THEIL=TheilU(Fe,Ye,type = 1),
Um=Um(Fe,Ye),
Us=Us(Fe,Ye),
Uc=Uc(Fe,Ye)
)
Fp<-Pronostico_Prueba[[j]]
Yp<-Datos_Prueba$cigs
Resultados_Performance[[j]]<-data.frame(
R2 = R2(Fp,Yp ),
RMSE = RMSE(Fp, Yp),
MAE = MAE(Fp,Yp),
MAPE= MAPE(Fp,Yp)*100,
THEIL=TheilU(Fp,Yp,type = 1),
Um=Um(Fp,Yp),
Us=Us(Fp,Yp),
Uc=Uc(Fp,Yp)
)
}
Desempeño con los datos de entrenamiento
library(dplyr)
bind_rows(Resultados_Performance_data_entrenamiento) %>%
stargazer(title= "Medidas de Performance Datos del Modelo",
type = "html",
digits = 3,
summary.stat = c("n","mean","sd","min","p25","p75","max"))
Medidas de Performance Datos del Modelo
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Pctl(25)
|
Pctl(75)
|
Max
|
|
|
|
R2
|
500
|
0.058
|
0.007
|
0.040
|
0.054
|
0.063
|
0.083
|
|
RMSE
|
500
|
13.304
|
0.198
|
12.629
|
13.175
|
13.439
|
13.828
|
|
MAE
|
500
|
10.562
|
0.133
|
10.068
|
10.474
|
10.654
|
10.969
|
|
MAPE
|
500
|
Inf.000
|
|
Inf
|
Inf
|
Inf
|
Inf
|
|
THEIL
|
500
|
0.522
|
0.006
|
0.505
|
0.517
|
0.526
|
0.541
|
|
Um
|
500
|
0.000
|
0.000
|
0
|
0
|
0
|
0
|
|
Us
|
500
|
0.613
|
0.020
|
0.554
|
0.601
|
0.625
|
0.669
|
|
Uc
|
500
|
0.389
|
0.020
|
0.332
|
0.376
|
0.401
|
0.448
|
|
|
Desempeño con los datos de prueba (los simulados)
bind_rows(Resultados_Performance) %>%
stargazer(title = "Medidas de Performance Simulación",
type = "html",
digits = 3,
summary.stat = c("n","mean","sd","min","p25","p75","max"))
Medidas de Performance Simulación
|
|
|
Statistic
|
N
|
Mean
|
St. Dev.
|
Min
|
Pctl(25)
|
Pctl(75)
|
Max
|
|
|
|
R2
|
500
|
0.039
|
0.018
|
0.002
|
0.027
|
0.051
|
0.099
|
|
RMSE
|
500
|
13.479
|
0.595
|
11.734
|
13.064
|
13.853
|
15.361
|
|
MAE
|
500
|
10.728
|
0.290
|
9.834
|
10.528
|
10.904
|
11.626
|
|
MAPE
|
500
|
Inf.000
|
|
Inf
|
Inf
|
Inf
|
Inf
|
|
THEIL
|
500
|
0.528
|
0.013
|
0.491
|
0.519
|
0.536
|
0.564
|
|
Um
|
500
|
0.002
|
0.003
|
0.00000
|
0.0003
|
0.003
|
0.021
|
|
Us
|
500
|
0.597
|
0.051
|
0.476
|
0.563
|
0.636
|
0.751
|
|
Uc
|
500
|
0.405
|
0.051
|
0.253
|
0.367
|
0.441
|
0.529
|
|
|
LS0tDQp0aXRsZTogIkFjdHVhbGl6YWNpw7NuIGRlbCBzY3JpcHQgZGUgc2ltdWxhY2nDs24iDQphdXRob3I6ICJBZGVtaXIgUMOpcmV6Ig0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDogDQogICAgdG9jOiB0cnVlDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCiMjIyBNb2RlbG8gT3JpZ2luYWwNCg0KYGBge3IscmVzdWx0cz0nYXNpcycsd2FybmluZz1GQUxTRSxtZXNzYWdlPUZBTFNFfQ0Kb3B0aW9ucyhzY2lwZW4gPSA5OTk5OSkNCmxvYWQoIkM6L1VzZXJzL0Vjb25vbWlhL0Rlc2t0b3Avc21va2UuUkRhdGEiKSAjIGVuIG90cm9zIGVqZXJjaWNpb3MgcG9kcsOtYSBzZXIgbmVjZXNhcmlvIGltcG9ydGFyIHVuIGNzdiBvIHVuIGFyY2hpdm8gZGUgRXhjZWwNCmRhdGE8LWRhdGEgI2FxdcOtIHNlIHB1ZWRlIGNhbWJpYXIgZWwgZGF0YWZyYW1lIGRlIG1hbmVyYSBzZW5jaWxsYQ0KZXF1YXRpb248LWFzLmZvcm11bGEoImNpZ3N+Y2lncHJpYytsY2lncHJpYytpbmNvbWUrbGluY29tZSthZ2UrYWdlc3ErZWR1Yyt3aGl0ZStyZXN0YXVybiIpICNHdWFyZGFyIGxhIGbDs3JtdWxhIGRlbCBtb2RlbG8gcGFyYSBubyB0ZW5lciBxdWUgZXN0YXIgZXNjcmliaWVuZG9sYSBlbiBjYWRhIG9jYXNpw7NuDQplbmRvZ2VuYTwtZGF0YSRjaWdzICNhcXXDrSBkZXNpZ25hbW9zIGxhIHZhcmlhYmxlIGVuZMOzZ2VuYSBkZWwgbW9kZWxvLCBlc3RvIGVzIG5lY2VzYXJpbyBwYXJhIGVsIHNjcmlwdCBkZSBzaW11bGFjacOzbi4NCk1vZGVsb19zbW9rZTwtbG0oZm9ybXVsYSA9ZXF1YXRpb24sZGF0YSA9IGRhdGEpDQpzdGFyZ2F6ZXI6OnN0YXJnYXplcihNb2RlbG9fc21va2UsDQogICAgICAgICAgICAgICAgICAgICB0aXRsZSA9ICJNb2RlbG8gRXN0aW1hZG8iLA0KICAgICAgICAgICAgICAgICAgICAgdHlwZSA9ICJodG1sIiwNCiAgICAgICAgICAgICAgICAgICAgIGRpZ2l0cyA9IDgpDQpgYGANCg0KDQpgYGB7cix3YXJuaW5nPUZBTFNFLG1lc3NhZ2U9RkFMU0UscmVzdWx0cz0nYXNpcyd9DQojRnVuY2lvbmVzIHBhcmEgbGEgZGVzaWd1YWxkYWQgZGUgVGhlaWwNClVtPC1mdW5jdGlvbihwcm9ub3N0aWNhZG8sb2JzZXJ2YWRvKXsNCiAgbGlicmFyeShEZXNjVG9vbHMpDQogICgobWVhbihwcm9ub3N0aWNhZG8pLW1lYW4ob2JzZXJ2YWRvKSleMikvTVNFKHByb25vc3RpY2FkbyxvYnNlcnZhZG8pIA0KfQ0KVXM8LWZ1bmN0aW9uKHByb25vc3RpY2FkbyxvYnNlcnZhZG8pew0KICBsaWJyYXJ5KERlc2NUb29scykNCiAgKChzZChwcm9ub3N0aWNhZG8pLXNkKG9ic2VydmFkbykpXjIpL01TRShwcm9ub3N0aWNhZG8sb2JzZXJ2YWRvKQ0KfQ0KVWM8LWZ1bmN0aW9uKHByb25vc3RpY2FkbyxvYnNlcnZhZG8pew0KICBsaWJyYXJ5KERlc2NUb29scykNCiAgKDIqKDEtY29yKHByb25vc3RpY2FkbyxvYnNlcnZhZG8pKSpzZChwcm9ub3N0aWNhZG8pKnNkKG9ic2VydmFkbykpL01TRShwcm9ub3N0aWNhZG8sb2JzZXJ2YWRvKX0NClRIRUlMX1U8LWZ1bmN0aW9uKHByb25vc3RpY2FkbyxvYnNlcnZhZG8pew0KICAgbGlicmFyeShEZXNjVG9vbHMpDQogIFJNU0UocHJvbm9zdGljYWRvLG9ic2VydmFkbykvKHNxcnQobWVhbihwcm9ub3N0aWNhZG9eMikpK3NxcnQobWVhbihvYnNlcnZhZG9eMikpKQ0KfQ0KYGBgDQoNCiMjIyBTaW11bGFjacOzbg0KDQpgYGB7cix3YXJuaW5nPUZBTFNFLG1lc3NhZ2U9RkFMU0UscmVzdWx0cz0nYXNpcyd9DQojU2NyaXB0IGRlIFNpbXVsYWNpw7NuIHZlcnNpw7NuIDINCm9wdGlvbnMoc2NpcGVuID0gOTk5OTk5KSANCmxpYnJhcnkoZHBseXIpIA0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoRGVzY1Rvb2xzKSANCmxpYnJhcnkoc3RhcmdhemVyKSANCnNldC5zZWVkKDUwKSANCm51bWVyb19kZV9tdWVzdHJhczwtNTAwDQpwcm9wb3JjaW9uX2VudHJlbmFtaWVudG88LTAuNzUNCiMgQ3JlYWNpw7NuIGRlIGxhcyBtdWVzdHJhcywgYXF1w60gdXNhbW9zIGxhIHZhcmlhYmxlIGVuZMOzZ2VuYSBxdWUgZGVmaW5pbW9zIGNvbiBhbnRlcmlvcmlkYWQgDQptdWVzdHJhczwtIGVuZG9nZW5hICU+JQ0KICBjcmVhdGVEYXRhUGFydGl0aW9uKHAgPSBwcm9wb3JjaW9uX2VudHJlbmFtaWVudG8sDQogICAgICAgICAgICAgICAgICAgICAgdGltZXMgPSBudW1lcm9fZGVfbXVlc3RyYXMsDQogICAgICAgICAgICAgICAgICAgICAgbGlzdCA9IFRSVUUpDQojTGlzdGFzIHZhY2lhcyBwYXJhIGxhIHNpbXVsYWNpw7NuDQpNb2RlbG9zX0VudHJlbmFtaWVudG88LXZlY3Rvcihtb2RlID0gImxpc3QiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVuZ3RoID0gbnVtZXJvX2RlX211ZXN0cmFzKQ0KUHJvbm9zdGljb19QcnVlYmE8LXZlY3Rvcihtb2RlID0gImxpc3QiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVuZ3RoID0gbnVtZXJvX2RlX211ZXN0cmFzKQ0KUmVzdWx0YWRvc19QZXJmb3JtYW5jZV9kYXRhX2VudHJlbmFtaWVudG88LXZlY3Rvcihtb2RlID0gImxpc3QiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgbGVuZ3RoID0gbnVtZXJvX2RlX211ZXN0cmFzKQ0KUmVzdWx0YWRvc19QZXJmb3JtYW5jZTwtdmVjdG9yKG1vZGUgPSAibGlzdCIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsZW5ndGggPSBudW1lcm9fZGVfbXVlc3RyYXMpDQojIEVzdGltYXIgbG9zIG1vZGVsb3MgZGUgY2FkYSBtdWVzdHJhIHkgc3VzIG1lZGlkYXMgZGUgZGVzZW1wZcOxbyBwcmVkaWN0aXZvDQpmb3IoaiBpbiAxOm51bWVyb19kZV9tdWVzdHJhcyl7DQpEYXRvc19FbnRyZW5hbWllbnRvPC0gZGF0YVttdWVzdHJhc1tbal1dLCBdDQpEYXRvc19QcnVlYmE8LSBkYXRhWy1tdWVzdHJhc1tbal1dLCBdDQpNb2RlbG9zX0VudHJlbmFtaWVudG9bW2pdXTwtbG0oZm9ybXVsYSA9IGVxdWF0aW9uLGRhdGE9RGF0b3NfRW50cmVuYW1pZW50bykNClByb25vc3RpY29fUHJ1ZWJhW1tqXV08LU1vZGVsb3NfRW50cmVuYW1pZW50b1tbal1dICU+JSBwcmVkaWN0KERhdG9zX1BydWViYSkNCkZlPC1Nb2RlbG9zX0VudHJlbmFtaWVudG9bW2pdXSRmaXR0ZWQudmFsdWVzDQpZZTwtRGF0b3NfRW50cmVuYW1pZW50byRjaWdzDQpSZXN1bHRhZG9zX1BlcmZvcm1hbmNlX2RhdGFfZW50cmVuYW1pZW50b1tbal1dPC1kYXRhLmZyYW1lKCANCiAgICAgICAgICAgIFIyID0gUjIoRmUsWWUpLA0KICAgICAgICAgICAgUk1TRSA9IFJNU0UoRmUsWWUpLA0KICAgICAgICAgICAgTUFFID0gTUFFKEZlLFllKSwNCiAgICAgICAgICAgIE1BUEU9IE1BUEUoRmUsWWUpKjEwMCwNCiAgICAgICAgICAgIFRIRUlMPVRoZWlsVShGZSxZZSx0eXBlID0gMSksDQogICAgICAgICAgICBVbT1VbShGZSxZZSksDQogICAgICAgICAgICBVcz1VcyhGZSxZZSksDQogICAgICAgICAgICBVYz1VYyhGZSxZZSkNCiAgICAgICAgICAgICkNCkZwPC1Qcm9ub3N0aWNvX1BydWViYVtbal1dDQpZcDwtRGF0b3NfUHJ1ZWJhJGNpZ3MNClJlc3VsdGFkb3NfUGVyZm9ybWFuY2VbW2pdXTwtZGF0YS5mcmFtZSggDQogICAgICAgICAgICBSMiA9IFIyKEZwLFlwICksDQogICAgICAgICAgICBSTVNFID0gUk1TRShGcCwgWXApLA0KICAgICAgICAgICAgTUFFID0gTUFFKEZwLFlwKSwNCiAgICAgICAgICAgIE1BUEU9IE1BUEUoRnAsWXApKjEwMCwNCiAgICAgICAgICAgIFRIRUlMPVRoZWlsVShGcCxZcCx0eXBlID0gMSksIA0KICAgICAgICAgICAgVW09VW0oRnAsWXApLA0KICAgICAgICAgICAgVXM9VXMoRnAsWXApLA0KICAgICAgICAgICAgVWM9VWMoRnAsWXApDQogICAgICAgICAgICApDQp9DQoNCmBgYA0KDQoNCiMjIyBEZXNlbXBlw7FvIGNvbiBsb3MgZGF0b3MgZGUgZW50cmVuYW1pZW50bw0KDQpgYGB7cixyZXN1bHRzPSdhc2lzJ30NCmxpYnJhcnkoZHBseXIpDQpiaW5kX3Jvd3MoUmVzdWx0YWRvc19QZXJmb3JtYW5jZV9kYXRhX2VudHJlbmFtaWVudG8pICU+JSANCiAgc3RhcmdhemVyKHRpdGxlPSAiTWVkaWRhcyBkZSBQZXJmb3JtYW5jZSBEYXRvcyBkZWwgTW9kZWxvIiwNCiAgICAgICAgICAgIHR5cGUgPSAiaHRtbCIsDQogICAgICAgICAgICBkaWdpdHMgPSAzLA0KICAgICAgICAgICAgc3VtbWFyeS5zdGF0ID0gYygibiIsIm1lYW4iLCJzZCIsIm1pbiIsInAyNSIsInA3NSIsIm1heCIpKQ0KYGBgDQojIyMgRGVzZW1wZcOxbyBjb24gbG9zIGRhdG9zIGRlIHBydWViYSAobG9zIHNpbXVsYWRvcykNCg0KYGBge3IscmVzdWx0cz0nYXNpcyd9DQpiaW5kX3Jvd3MoUmVzdWx0YWRvc19QZXJmb3JtYW5jZSkgJT4lIA0KICBzdGFyZ2F6ZXIodGl0bGUgPSAiTWVkaWRhcyBkZSBQZXJmb3JtYW5jZSBTaW11bGFjacOzbiIsDQogICAgICAgICAgICB0eXBlID0gImh0bWwiLA0KICAgICAgICAgICAgZGlnaXRzID0gMywNCiAgICAgICAgICAgIHN1bW1hcnkuc3RhdCA9IGMoIm4iLCJtZWFuIiwic2QiLCJtaW4iLCJwMjUiLCJwNzUiLCJtYXgiKSkNCmBgYA0KDQoNCg==