Carga de librerías y opciones de sistema.
Carga de datos
data_tractos <-read.csv("C:/Users/MSingla/Documents/(1) Present studies/GC (2020) - Navistar-International Mexico/(3) WIP/mexico_tractos_data.csv")
head(data_tractos)
## year_quarter tractos_ventas pib_real pib_manufacturing
## 1 2002 Q1 1181 12 415 541 2 184 801
## 2 2002 Q2 1129 13 112 363 2 272 069
## 3 2002 Q3 1191 12 889 950 2 265 685
## 4 2002 Q4 1494 13 084 104 2 264 165
## 5 2003 Q1 1383 12 803 327 2 189 099
## 6 2003 Q2 1811 13 182 932 2 233 791
## industrial_activity_index gov_consumption investment interest_rate
## 1 86.38 1,604,492 2,341,839 7.537
## 2 86.95 1,646,079 2,371,656 6.757
## 3 87.47 1,593,893 2,499,619 7.370
## 4 88.29 1,624,808 2,442,262 7.110
## 5 88.42 1,622,763 2,460,229 8.937
## 6 88.52 1,668,969 2,379,630 5.583
## exchange_rate retail_sales_index q2 q3 q4 emission_norm18
## 1 9.090 NA 0 0 0 0
## 2 9.632 NA 1 0 0 0
## 3 9.927 NA 0 1 0 0
## 4 10.197 NA 0 0 1 0
## 5 10.929 NA 0 0 0 0
## 6 10.411 NA 1 0 0 0
Limpieza y ordenamiento de dataset
data_tractos$year_quarter <- as.yearqtr(data_tractos$year_quarter)
data_tractos$pib_real<- as.numeric(str_replace_all(data_tractos$pib_real, " ", ""))
data_tractos$pib_manufacturing <- as.numeric(str_replace_all(data_tractos$pib_manufacturing, " ",""))
data_tractos$gov_consumption <- as.numeric(str_replace_all(data_tractos$gov_consumption, ",", ""))
data_tractos$investment <- as.numeric(str_replace_all(data_tractos$investment, ",",""))
MODELOS SELECCIONADOS
MODELO A: PIB real + Índice retail + Tipo de cambio -> Serie corta 2008-2019
tractos_regA <- lm(tractos_ventas ~ pib_real + retail_sales_index + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos)
summary(tractos_regA)
##
## Call:
## lm(formula = tractos_ventas ~ pib_real + retail_sales_index +
## exchange_rate + emission_norm18 + q2 + q3 + q4, data = data_tractos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1276.4 -355.2 -5.5 352.3 1361.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -16449.3809733 2367.9042656 -6.947 0.0000000222 ***
## pib_real 0.0003305 0.0002139 1.545 0.13028
## retail_sales_index 191.7149521 55.5400042 3.452 0.00133 **
## exchange_rate -382.8045249 78.7458432 -4.861 0.0000184038 ***
## emission_norm18 1181.5489051 419.3489718 2.818 0.00748 **
## q2 164.2595799 251.9054034 0.652 0.51809
## q3 284.5755583 239.8517637 1.186 0.24244
## q4 712.1879781 321.3873265 2.216 0.03245 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 563.3 on 40 degrees of freedom
## (48 observations deleted due to missingness)
## Multiple R-squared: 0.8084, Adjusted R-squared: 0.7748
## F-statistic: 24.11 on 7 and 40 DF, p-value: 0.000000000001787
tractos_regA_result <- as.data.frame(residuals(tractos_regA))
tractos_regA_result$residuos <- tractos_regA_result[,1]
tractos_regA_result$`residuals(tractos_regA)` <- NULL
tractos_regA_result$predicts <- predict(tractos_regA)
tractos_regA_result$year_quarter <- data_tractos[25:72,1]
tractos_regA_result$observado <- data_tractos[25:72, 2]
tractos_regA_result$ratio_error <- (tractos_regA_result[,1] / tractos_regA_result[,4] * 100)
ggplot() +
geom_line(data= tractos_regA_result, aes(x= year_quarter, y= observado, color= "Ventas históricas"), color = "black") +
geom_line(data= tractos_regA_result, aes(x= year_quarter, y= predicts, color= "Ventas modeladas")) +
labs(title = "Venta de tractocamiones en México (2008-2019)",
subtitle = "MODELO A: ventas históricas vs. modeladas",
y= "ventas tractocamiones",
x= "Año / trimestre",
color = "Series",
caption = "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust =1))
MODELO A: Prueba de capacidad predictiva -> retro-proyección 2018-2019
data_tractos_histA <- data_tractos[25:64,]
data_tractos_futureA <- data_tractos [65:72,]
tractos_regA_prueba <- lm(tractos_ventas ~ pib_real + retail_sales_index + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos_histA)
summary(tractos_regA_prueba)
##
## Call:
## lm(formula = tractos_ventas ~ pib_real + retail_sales_index +
## exchange_rate + emission_norm18 + q2 + q3 + q4, data = data_tractos_histA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -812.74 -346.72 -65.02 300.31 927.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -21982.83875505 2496.18100369 -8.807 0.000000000461 ***
## pib_real 0.00000692 0.00021204 0.033 0.97417
## retail_sales_index 309.37088787 57.96322916 5.337 0.000007444860 ***
## exchange_rate -472.32447317 73.78390679 -6.401 0.000000341249 ***
## emission_norm18 1236.90261626 535.41146197 2.310 0.02748 *
## q2 286.16590788 241.30086340 1.186 0.24438
## q3 536.90858117 235.28215181 2.282 0.02928 *
## q4 1118.57672807 310.74791970 3.600 0.00106 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 492.3 on 32 degrees of freedom
## Multiple R-squared: 0.8546, Adjusted R-squared: 0.8228
## F-statistic: 26.87 on 7 and 32 DF, p-value: 0.000000000011
tractos_regA_result_prueba <- as.data.frame(residuals(tractos_regA_prueba))
tractos_regA_result_prueba$residuos <- tractos_regA_result_prueba[,1]
tractos_regA_result_prueba$`residuals(tractos_regA_prueba)` <- NULL
tractos_regA_result_prueba$predicts <- predict(tractos_regA_prueba)
tractos_regA_result_prueba$year_quarter <- data_tractos_histA[,1]
tractos_regA_result_prueba$observados <- data_tractos_histA[,2]
tractos_regA_result_prueba[65:72, 4] <- data_tractos_futureA[,2]
tractos_regA_result_prueba[65:72, 3] <- data_tractos_futureA[,1]
tractos_regA_result_prueba1 <- predict.lm(tractos_regA_prueba, data_tractos_futureA, interval = "confidence", level = 0.9)
tractos_regA_result_prueba$forecast <- NA
tractos_regA_result_prueba[65:72, 5] <- tractos_regA_result_prueba1[,1]
tractos_regA_result_prueba$lower <- NA
tractos_regA_result_prueba[65:72, 6] <- tractos_regA_result_prueba1[1:8,2]
tractos_regA_result_prueba$upper <- NA
tractos_regA_result_prueba[65:72, 7] <- tractos_regA_result_prueba1[1:8,3]
tractos_regA_result_prueba = tractos_regA_result_prueba[-c(41.1:64.1),]
ggplot()+
geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= observados), color = "gray20") +
geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= forecast, color = "proyección media"), linetype = "dotdash") +
geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= lower, color = "Lower confidence"), linetype = "dotdash") +
geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= upper, color = "Upper confidence"), linetype = "dotdash") +
labs( title = "Ventas de tractocamiones en México",
subtitle = "Modelo A: ventas 2008-2017 c/ proyección 2018-2019",
y= "Ventas tractocamiones",
x= "Año / trimestre",
color= "Series",
caption= "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 40 row(s) containing missing values (geom_path).
## Warning: Removed 40 row(s) containing missing values (geom_path).
## Warning: Removed 40 row(s) containing missing values (geom_path).
MODELO B: PIB manufacturero + Consumo de gobierno + Inversión + tipo de cambio -> Serie larga 2002-2019
tractos_regB <- lm(tractos_ventas ~ pib_manufacturing + gov_consumption + investment + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos)
summary(tractos_regB)
##
## Call:
## lm(formula = tractos_ventas ~ pib_manufacturing + gov_consumption +
## investment + exchange_rate + emission_norm18 + q2 + q3 +
## q4, data = data_tractos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -888.41 -345.77 6.56 326.24 1273.84
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6370.2857956 1052.1660249 -6.054 0.00000008574645 ***
## pib_manufacturing 0.0055068 0.0006456 8.530 0.00000000000424 ***
## gov_consumption -0.0054912 0.0011063 -4.964 0.00000555727491 ***
## investment 0.0018024 0.0003512 5.131 0.00000297670411 ***
## exchange_rate -15.2320276 49.5054002 -0.308 0.7593
## emission_norm18 1262.1702022 275.9738064 4.574 0.00002301013938 ***
## q2 271.2684939 179.6445072 1.510 0.1360
## q3 -95.1434592 182.1998994 -0.522 0.6034
## q4 366.6215834 178.1678341 2.058 0.0438 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 516.4 on 63 degrees of freedom
## (24 observations deleted due to missingness)
## Multiple R-squared: 0.8519, Adjusted R-squared: 0.8331
## F-statistic: 45.29 on 8 and 63 DF, p-value: < 0.00000000000000022
tractos_regB_result <- as.data.frame(residuals(tractos_regB))
tractos_regB_result$residuos <- tractos_regB_result[,1]
tractos_regB_result$`residuals(tractos_regB)` <- NULL
tractos_regB_result$predicts <- predict(tractos_regB)
tractos_regB_result$year_quarter <- data_tractos[1:72,1]
tractos_regB_result$observado <- data_tractos[1:72, 2]
tractos_regB_result$ratio_error <- (tractos_regB_result[,1] / tractos_regB_result[,4] * 100)
ggplot() +
geom_line(data= tractos_regB_result, aes(x= year_quarter, y= observado, color= "Ventas históricas"), color = "black") +
geom_line(data= tractos_regB_result, aes(x= year_quarter, y= predicts, color= "Ventas modeladas")) +
labs(title = "Venta de tractocamiones en México (2002-2019)",
subtitle = "MODELO B: ventas históricas vs. modeladas",
y= "ventas tractocamiones",
x= "Año / trimestre",
color = "Series",
caption = "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust =1))
MODELO B: Prueba de capacidad predictiva -> retro-proyección 2017-2019
data_tractos_histB <- data_tractos[1:60,]
data_tractos_futureB <- data_tractos [61:72,]
tractos_regB_prueba <- lm(tractos_ventas ~ pib_manufacturing + gov_consumption + investment + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos_histB)
summary(tractos_regB_prueba)
##
## Call:
## lm(formula = tractos_ventas ~ pib_manufacturing + gov_consumption +
## investment + exchange_rate + emission_norm18 + q2 + q3 +
## q4, data = data_tractos_histB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -947.18 -325.64 -12.01 320.86 1287.74
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6489.7305102 1293.2894173 -5.018 0.00000671685 ***
## pib_manufacturing 0.0056644 0.0007817 7.246 0.00000000222 ***
## gov_consumption -0.0058246 0.0011801 -4.936 0.00000893806 ***
## investment 0.0018811 0.0004367 4.308 0.00007521693 ***
## exchange_rate -8.8067958 56.4230514 -0.156 0.87658
## emission_norm18 1091.9457835 348.2445069 3.136 0.00285 **
## q2 319.0898791 201.7781173 1.581 0.11997
## q3 7.3179259 204.4790973 0.036 0.97159
## q4 311.0141919 203.9873984 1.525 0.13352
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 532.7 on 51 degrees of freedom
## Multiple R-squared: 0.8353, Adjusted R-squared: 0.8094
## F-statistic: 32.32 on 8 and 51 DF, p-value: < 0.00000000000000022
tractos_regB_result_prueba <- as.data.frame(residuals(tractos_regB_prueba))
tractos_regB_result_prueba$residuos <- tractos_regB_result_prueba[,1]
tractos_regB_result_prueba$`residuals(tractos_regB_prueba)` <- NULL
tractos_regB_result_prueba$predicts <- predict(tractos_regB_prueba)
tractos_regB_result_prueba$year_quarter <- data_tractos_histB[,1]
tractos_regB_result_prueba$observados <- data_tractos_histB[,2]
tractos_regB_result_prueba[61:72, 4] <- data_tractos_futureB[,2]
tractos_regB_result_prueba[61:72, 3] <- data_tractos_futureB[,1]
tractos_regB_result_prueba1 <- predict.lm(tractos_regB_prueba, data_tractos_futureB, interval = "confidence", level = 0.9)
tractos_regB_result_prueba$forecast <- NA
tractos_regB_result_prueba[61:72, 5] <- tractos_regB_result_prueba1[1:12,1]
tractos_regB_result_prueba$lower <- NA
tractos_regB_result_prueba[61:72, 6] <- tractos_regB_result_prueba1[1:12,2]
tractos_regB_result_prueba$upper <- NA
tractos_regB_result_prueba[61:72, 7] <- tractos_regB_result_prueba1[1:12,3]
ggplot()+
geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= observados), color = "gray20") +
geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= forecast, color = "proyección media"), linetype = "dotdash") +
geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= lower, color = "Lower confidence"), linetype = "dotdash") +
geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= upper, color = "Upper confidence"), linetype = "dotdash") +
labs( title = "Ventas de tractocamiones en México",
subtitle = "Modelo A: ventas 2002-2016 c/ proyección 2017-2019",
y= "Ventas tractocamiones",
x= "Año / trimestre",
color= "Series",
caption= "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 60 row(s) containing missing values (geom_path).
## Warning: Removed 60 row(s) containing missing values (geom_path).
## Warning: Removed 60 row(s) containing missing values (geom_path).