Carga de librerías y opciones de sistema.
Carga de datos
data <-read.csv("C:/Users/MSingla/Documents/(1) Present studies/GC (2020) - Navistar-International Mexico/(3) WIP/mexico_tractos_data.csv")
Limpieza y ordenamiento de dataset
data$year_quarter <- as.yearqtr(data$year_quarter)
data$pib_real<- as.numeric(str_replace_all(data$pib_real, ",", ""))
data$pib_manufacturing <- as.numeric(str_replace_all(data$pib_manufacturing, ",",""))
data$gov_consumption <- as.numeric(str_replace_all(data$gov_consumption, ",", ""))
data$investment <- as.numeric(str_replace_all(data$investment, ",",""))
data$tractos_ventas <- as.numeric(str_replace_all(data$tractos_ventas, ",",""))
data$class6_7_ventas <- data$class6_ventas + data$class7_ventas
MODELO: PIB Manufacturero + Gasto de gobierno + Inversión + tipo de cambio MODELO: Prueba de capacidad predictiva -> proyección 2020-2021
SEGMENTO: TRACTOCAMIONES
data_hist <- data[1:73,]
data_fut <- data[74:80,]
reg_tractos <- lm(log(tractos_ventas) ~ log(pib_manufacturing) + log(gov_consumption)+ log(investment) + q2 +q3 +q4 + emission_norm18, data= data_hist)
summary(reg_tractos)
##
## Call:
## lm(formula = log(tractos_ventas) ~ log(pib_manufacturing) + log(gov_consumption) +
## log(investment) + q2 + q3 + q4 + emission_norm18, data = data_hist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64194 -0.10206 0.00464 0.09203 0.41705
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -45.26885 3.60942 -12.542 < 0.0000000000000002 ***
## log(pib_manufacturing) 5.58098 0.52811 10.568 0.000000000000000946 ***
## log(gov_consumption) -4.37681 0.52541 -8.330 0.000000000007567232 ***
## log(investment) 2.27519 0.33581 6.775 0.000000004320710832 ***
## q2 0.10586 0.06261 1.691 0.09565 .
## q3 -0.06161 0.06597 -0.934 0.35381
## q4 0.12508 0.06445 1.941 0.05662 .
## emission_norm18 0.27584 0.10084 2.735 0.00802 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1891 on 65 degrees of freedom
## Multiple R-squared: 0.8594, Adjusted R-squared: 0.8443
## F-statistic: 56.78 on 7 and 65 DF, p-value: < 0.00000000000000022
reg_tractos_results <- as.data.frame(residuals(reg_tractos))
reg_tractos_results$residuos <- reg_tractos_results[,1]
reg_tractos_results$`residuals(reg_tractos)` <- NULL
reg_tractos_results$predicts <- predict(reg_tractos)
reg_tractos_results$year_quarter <- data_hist[,1]
reg_tractos_results$observados <- data_hist[,2]
reg_tractos_results[74:80, 4] <- data_fut[,2]
reg_tractos_results[74:80, 3] <- data_fut[,1]
reg_tractos_results1 <- predict.lm(reg_tractos, data_fut, interval = "confidence", level = 0.95)
reg_tractos_results$forecast <- NA
reg_tractos_results[74:80, 5] <- reg_tractos_results1[1:7,1]
reg_tractos_results[73, 5] <- log(reg_tractos_results[73,4])
reg_tractos_results$lower <- NA
reg_tractos_results[74:80, 6] <- reg_tractos_results1[1:7,2]
reg_tractos_results[73, 6] <- log(reg_tractos_results[73,4])
reg_tractos_results$upper <- NA
reg_tractos_results[74:80, 7] <- reg_tractos_results1[1:7,3]
reg_tractos_results[73, 7] <- log(reg_tractos_results[73,4])
reg_tractos_results$forecast_exp <- NA
reg_tractos_results[74:80, 8] <- trunc(exp(reg_tractos_results1[1:7,1]))
reg_tractos_results[73, 8] <- reg_tractos_results[73,4]
reg_tractos_results$lower_exp <- NA
reg_tractos_results[74:80, 9] <- trunc(exp(reg_tractos_results1[1:7,2]))
reg_tractos_results[73, 9] <- reg_tractos_results[73,4]
reg_tractos_results$upper_exp <- NA
reg_tractos_results[74:80, 10] <- trunc(exp(reg_tractos_results1[1:7,3]))
reg_tractos_results[73, 10] <- reg_tractos_results[73,4]
ggplot()+
geom_line(data= reg_tractos_results, aes(x= year_quarter, y= observados), color = "gray20") +
geom_line(data= reg_tractos_results, aes(x= year_quarter, y= exp(predicts), color = "Ventas modeladas")) +
geom_line(data= reg_tractos_results, aes(x= year_quarter, y= exp(forecast), color = "proyección media"), linetype = "dotdash") +
geom_line(data= reg_tractos_results, aes(x= year_quarter, y= exp(lower), color = "Lower confidence"), linetype = "dotdash") +
geom_line(data= reg_tractos_results, aes(x= year_quarter, y= exp(upper), color = "Upper confidence"), linetype = "dotdash") +
labs( title = "Ventas de tractocamiones en México",
y= "Ventas tractocamiones",
x= "Año / trimestre",
color= "Series",
caption= "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 72 row(s) containing missing values (geom_path).
## Warning: Removed 72 row(s) containing missing values (geom_path).
## Warning: Removed 72 row(s) containing missing values (geom_path).
reg_tractos_results %>%
filter(year_quarter > "2018 Q1") %>%
select(year_quarter, observados, forecast_exp, upper_exp, lower_exp)
## year_quarter observados forecast_exp upper_exp lower_exp
## 1 2018 Q2 4055 NA NA NA
## 2 2018 Q3 3963 NA NA NA
## 3 2018 Q4 4913 NA NA NA
## 4 2019 Q1 4430 NA NA NA
## 5 2019 Q2 5950 NA NA NA
## 6 2019 Q3 4020 NA NA NA
## 7 2019 Q4 4076 NA NA NA
## 8 2020 Q1 2653 2653 2653 2653
## 9 2020 Q2 NA 1448 1708 1227
## 10 2020 Q3 NA 1378 1682 1129
## 11 2020 Q4 NA 1598 2009 1271
## 12 2021 Q1 NA 1523 1821 1274
## 13 2021 Q2 NA 2567 3395 1941
## 14 2021 Q3 NA 1857 2242 1538
## 15 2021 Q4 NA 2965 3589 2450
SEGMENTO: CLASE 8
reg_clase8 <- lm(log(class8_ventas) ~ log(pib_real) + log(gov_consumption)+ log(investment) + q2 +q3 +q4 + emission_norm18, data= data_hist)
summary(reg_clase8)
##
## Call:
## lm(formula = log(class8_ventas) ~ log(pib_real) + log(gov_consumption) +
## log(investment) + q2 + q3 + q4 + emission_norm18, data = data_hist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41106 -0.10527 -0.00894 0.12629 0.33251
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -34.74667 3.26646 -10.637 0.00000000000000072 ***
## log(pib_real) 1.04292 0.79314 1.315 0.193
## log(gov_consumption) -1.76573 0.71361 -2.474 0.016 *
## log(investment) 3.36106 0.34143 9.844 0.00000000000001658 ***
## q2 0.05811 0.05751 1.011 0.316
## q3 -0.03562 0.06109 -0.583 0.562
## q4 -0.05838 0.06044 -0.966 0.338
## emission_norm18 -0.07262 0.09197 -0.790 0.433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1722 on 65 degrees of freedom
## Multiple R-squared: 0.8737, Adjusted R-squared: 0.8601
## F-statistic: 64.22 on 7 and 65 DF, p-value: < 0.00000000000000022
reg_clase8_results <- as.data.frame(residuals(reg_clase8))
reg_clase8_results$residuos <- reg_clase8_results[,1]
reg_clase8_results$`residuals(reg_clase8)` <- NULL
reg_clase8_results$predicts <- predict(reg_clase8)
reg_clase8_results$year_quarter <- data_hist[,1]
reg_clase8_results$observados <- data_hist[,15]
reg_clase8_results[74:80, 4] <- data_fut[,15]
reg_clase8_results[74:80, 3] <- data_fut[,1]
reg_clase8_results1 <- predict.lm(reg_clase8, data_fut, interval = "confidence", level = 0.95)
reg_clase8_results$forecast <- NA
reg_clase8_results[74:80, 5] <- reg_clase8_results1[1:7,1]
reg_clase8_results[73, 5] <- log(reg_clase8_results[73,4])
reg_clase8_results$lower <- NA
reg_clase8_results[74:80, 6] <- reg_clase8_results1[1:7,2]
reg_clase8_results[73, 6] <- log(reg_clase8_results[73,4])
reg_clase8_results$upper <- NA
reg_clase8_results[74:80, 7] <- reg_clase8_results1[1:7,3]
reg_clase8_results[73, 7] <- log(reg_clase8_results[73,4])
reg_clase8_results$forecast_exp <- NA
reg_clase8_results[74:80, 8] <- trunc(exp(reg_clase8_results1[1:7,1]))
reg_clase8_results[73, 8] <- reg_clase8_results[73,4]
reg_clase8_results$lower_exp <- NA
reg_clase8_results[74:80, 9] <- trunc(exp(reg_clase8_results1[1:7,2]))
reg_clase8_results[73, 9] <- reg_clase8_results[73,4]
reg_clase8_results$upper_exp <- NA
reg_clase8_results[74:80, 10] <- trunc(exp(reg_clase8_results1[1:7,3]))
reg_clase8_results[73, 10] <- reg_clase8_results[73,4]
ggplot()+
geom_line(data= reg_clase8_results, aes(x= year_quarter, y= observados), color = "gray20") +
geom_line(data= reg_clase8_results, aes(x= year_quarter, y= exp(predicts), color = "Ventas modeladas")) +
geom_line(data= reg_clase8_results, aes(x= year_quarter, y= exp(forecast), color = "proyección media"), linetype = "dotdash") +
geom_line(data= reg_clase8_results, aes(x= year_quarter, y= exp(lower), color = "Lower confidence"), linetype = "dotdash") +
geom_line(data= reg_clase8_results, aes(x= year_quarter, y= exp(upper), color = "Upper confidence"), linetype = "dotdash") +
labs( title = "Ventas de Clase 8 en México",
y= "Ventas clase 8",
x= "Año / trimestre",
color= "Series",
caption= "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 7 row(s) containing missing values (geom_path).
## Warning: Removed 72 row(s) containing missing values (geom_path).
## Warning: Removed 72 row(s) containing missing values (geom_path).
## Warning: Removed 72 row(s) containing missing values (geom_path).
reg_clase8_results %>%
filter(year_quarter > "2017 Q1") %>%
select(year_quarter, observados, forecast_exp, upper_exp, lower_exp)
## year_quarter observados forecast_exp upper_exp lower_exp
## 1 2017 Q2 2352 NA NA NA
## 2 2017 Q3 2111 NA NA NA
## 3 2017 Q4 2443 NA NA NA
## 4 2018 Q1 2274 NA NA NA
## 5 2018 Q2 1926 NA NA NA
## 6 2018 Q3 2151 NA NA NA
## 7 2018 Q4 2234 NA NA NA
## 8 2019 Q1 2497 NA NA NA
## 9 2019 Q2 2250 NA NA NA
## 10 2019 Q3 2080 NA NA NA
## 11 2019 Q4 2218 NA NA NA
## 12 2020 Q1 1876 1876 1876 1876
## 13 2020 Q2 NA 928 1075 801
## 14 2020 Q3 NA 980 1167 823
## 15 2020 Q4 NA 1031 1266 839
## 16 2021 Q1 NA 1043 1229 885
## 17 2021 Q2 NA 1103 1422 856
## 18 2021 Q3 NA 1168 1386 985
## 19 2021 Q4 NA 1284 1524 1083
SEGMENTO: CLASE 6 y 7
data_hist_c7 <- data_hist [29:73,]
reg_clase7 <- lm(log(class6_7_ventas) ~ log(pib_real) + log(gov_consumption) + log(investment) + q2 +q3 +q4 + emission_norm18, data= data_hist_c7)
summary(reg_clase7)
##
## Call:
## lm(formula = log(class6_7_ventas) ~ log(pib_real) + log(gov_consumption) +
## log(investment) + q2 + q3 + q4 + emission_norm18, data = data_hist_c7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49812 -0.11343 0.01754 0.09776 0.44667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -30.69585 8.63616 -3.554 0.00106 **
## log(pib_real) 6.72351 2.24260 2.998 0.00483 **
## log(gov_consumption) -6.44883 2.76433 -2.333 0.02520 *
## log(investment) 1.25831 0.76016 1.655 0.10632
## q2 0.05037 0.08925 0.564 0.57592
## q3 -0.19086 0.11243 -1.698 0.09799 .
## q4 -0.04535 0.10383 -0.437 0.66482
## emission_norm18 -0.16669 0.15510 -1.075 0.28945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2082 on 37 degrees of freedom
## Multiple R-squared: 0.6673, Adjusted R-squared: 0.6044
## F-statistic: 10.6 on 7 and 37 DF, p-value: 0.0000003088
reg_clase7_results <- as.data.frame(residuals(reg_clase7))
reg_clase7_results$residuos <- reg_clase7_results[,1]
reg_clase7_results$`residuals(reg_clase7)` <- NULL
reg_clase7_results$predicts <- predict(reg_clase7)
reg_clase7_results$year_quarter <- data_hist_c7[,1]
reg_clase7_results$observados <- data_hist_c7[,19]
reg_clase7_results[74:80, 4] <- data_fut[,19]
reg_clase7_results[74:80, 3] <- data_fut[,1]
reg_clase7_results1 <- predict.lm(reg_clase7, data_fut, interval = "confidence", level = 0.95)
reg_clase7_results$forecast <- NA
reg_clase7_results[74:80, 5] <- reg_clase7_results1[1:7,1]
reg_clase7_results[73, 5] <- log(reg_clase7_results[73,4])
reg_clase7_results$lower <- NA
reg_clase7_results[74:80, 6] <- reg_clase7_results1[1:7,2]
reg_clase7_results[73, 6] <- log(reg_clase7_results[73,4])
reg_clase7_results$upper <- NA
reg_clase7_results[74:80, 7] <- reg_clase7_results1[1:7,3]
reg_clase7_results[73, 7] <- log(reg_clase7_results[73,4])
reg_clase7_results$forecast_exp <- NA
reg_clase7_results[74:80, 8] <- trunc(exp(reg_clase7_results1[1:7,1]))
reg_clase7_results[73, 8] <- reg_clase7_results[73,4]
reg_clase7_results$lower_exp <- NA
reg_clase7_results[74:80, 9] <- trunc(exp(reg_clase7_results1[1:7,2]))
reg_clase7_results[73, 9] <- reg_clase7_results[73,4]
reg_clase7_results$upper_exp <- NA
reg_clase7_results[74:80, 10] <- trunc(exp(reg_clase7_results1[1:7,3]))
reg_clase7_results[73, 10] <- reg_clase7_results[73,4]
ggplot()+
geom_line(data= reg_clase7_results, aes(x= year_quarter, y= observados), color = "gray20") +
geom_line(data= reg_clase7_results, aes(x= year_quarter, y= exp(predicts), color = "Ventas modeladas")) +
geom_line(data= reg_clase7_results, aes(x= year_quarter, y= exp(forecast), color = "proyección media"), linetype = "dotdash") +
geom_line(data= reg_clase7_results, aes(x= year_quarter, y= exp(lower), color = "Lower confidence"), linetype = "dotdash") +
geom_line(data= reg_clase7_results, aes(x= year_quarter, y= exp(upper), color = "Upper confidence"), linetype = "dotdash") +
labs( title = "Ventas de Clases 6 y 7 en México",
y= "Ventas clase 6 y 7",
x= "Año / trimestre",
color= "Series",
caption= "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 35 row(s) containing missing values (geom_path).
## Warning: Removed 35 row(s) containing missing values (geom_path).
## Warning: Removed 73 row(s) containing missing values (geom_path).
## Warning: Removed 73 row(s) containing missing values (geom_path).
## Warning: Removed 73 row(s) containing missing values (geom_path).
reg_clase7_results %>%
filter(year_quarter > "2017 Q1") %>%
select(year_quarter, observados, forecast_exp, upper_exp, lower_exp)
## year_quarter observados forecast_exp upper_exp lower_exp
## 1 2017 Q2 886 NA NA NA
## 2 2017 Q3 553 NA NA NA
## 3 2017 Q4 723 NA NA NA
## 4 2018 Q1 493 NA NA NA
## 5 2018 Q2 892 NA NA NA
## 6 2018 Q3 788 NA NA NA
## 7 2018 Q4 1085 NA NA NA
## 8 2019 Q1 589 NA NA NA
## 9 2019 Q2 625 NA NA NA
## 10 2019 Q3 784 NA NA NA
## 11 2019 Q4 768 NA NA NA
## 12 2020 Q1 488 NA NA NA
## 13 2020 Q2 NA 359 505 255
## 14 2020 Q3 NA 322 477 217
## 15 2020 Q4 NA 398 644 246
## 16 2021 Q1 NA 398 550 288
## 17 2021 Q2 NA 368 578 234
## 18 2021 Q3 NA 388 558 269
## 19 2021 Q4 NA 482 699 332
SEGMENTO: CLASE 4 y 5
reg_clase45 <- lm(log(class4_5_ventas) ~ log(exchange_rate)+ log(interest_rate) + q2 +q3 +q4, data= data_hist_c7)
summary(reg_clase45)
##
## Call:
## lm(formula = log(class4_5_ventas) ~ log(exchange_rate) + log(interest_rate) +
## q2 + q3 + q4, data = data_hist_c7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34399 -0.13810 0.02559 0.13289 0.30391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.03078 0.45753 21.924 < 0.0000000000000002 ***
## log(exchange_rate) -1.40949 0.19887 -7.087 0.0000000163 ***
## log(interest_rate) 0.43405 0.11137 3.897 0.000372 ***
## q2 0.05470 0.08046 0.680 0.500611
## q3 0.10467 0.08046 1.301 0.200957
## q4 0.27394 0.08061 3.399 0.001573 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1924 on 39 degrees of freedom
## Multiple R-squared: 0.6062, Adjusted R-squared: 0.5557
## F-statistic: 12.01 on 5 and 39 DF, p-value: 0.000000451
reg_clase45_results <- as.data.frame(residuals(reg_clase45))
reg_clase45_results$residuos <- reg_clase45_results[,1]
reg_clase45_results$`residuals(reg_clase45)` <- NULL
reg_clase45_results$predicts <- predict(reg_clase45)
reg_clase45_results$year_quarter <- data_hist_c7[,1]
reg_clase45_results$observados <- data_hist_c7[,18]
reg_clase45_results[74:80, 4] <- data_fut[,18]
reg_clase45_results[74:80, 3] <- data_fut[,1]
reg_clase45_results1 <- predict.lm(reg_clase45, data_fut, interval = "confidence", level = 0.95)
reg_clase45_results$forecast <- NA
reg_clase45_results[74:80, 5] <- reg_clase45_results1[1:7,1]
reg_clase45_results[73, 5] <- log(reg_clase45_results[73,4])
reg_clase45_results$lower <- NA
reg_clase45_results[74:80, 6] <- reg_clase45_results1[1:7,2]
reg_clase45_results[73, 6] <- log(reg_clase45_results[73,4])
reg_clase45_results$upper <- NA
reg_clase45_results[74:80, 7] <- reg_clase45_results1[1:7,3]
reg_clase45_results[73, 7] <- log(reg_clase45_results[73,4])
reg_clase45_results$forecast_exp <- NA
reg_clase45_results[74:80, 8] <- trunc(exp(reg_clase45_results1[1:7,1]))
reg_clase45_results[73, 8] <- reg_clase45_results[73,4]
reg_clase45_results$lower_exp <- NA
reg_clase45_results[74:80, 9] <- trunc(exp(reg_clase45_results1[1:7,2]))
reg_clase45_results[73, 9] <- reg_clase45_results[73,4]
reg_clase45_results$upper_exp <- NA
reg_clase45_results[74:80, 10] <- trunc(exp(reg_clase45_results1[1:7,3]))
reg_clase45_results[73, 10] <- reg_clase45_results[73,4]
ggplot()+
geom_line(data= reg_clase45_results, aes(x= year_quarter, y= observados), color = "gray20") +
geom_line(data= reg_clase45_results, aes(x= year_quarter, y= exp(predicts), color = "Ventas modeladas")) +
geom_line(data= reg_clase45_results, aes(x= year_quarter, y= exp(forecast), color = "proyección media"), linetype = "dotdash") +
geom_line(data= reg_clase45_results, aes(x= year_quarter, y= exp(lower), color = "Lower confidence"), linetype = "dotdash") +
geom_line(data= reg_clase45_results, aes(x= year_quarter, y= exp(upper), color = "Upper confidence"), linetype = "dotdash") +
labs( title = "Ventas de Clases 4 y 5 en México",
y= "Ventas clase 4 y 5",
x= "Año / trimestre",
color= "Series",
caption= "Fuente: Frost & Sullivan") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 35 row(s) containing missing values (geom_path).
## Warning: Removed 35 row(s) containing missing values (geom_path).
## Warning: Removed 73 row(s) containing missing values (geom_path).
## Warning: Removed 73 row(s) containing missing values (geom_path).
## Warning: Removed 73 row(s) containing missing values (geom_path).