Carga de librerías y opciones de sistema.

Carga de datos

data_tractos <-read.csv("C:/Users/MSingla/Documents/(1) Present studies/GC (2020) - Navistar-International Mexico/(3) WIP/mexico_tractos_data.csv")
head(data_tractos)
##   year_quarter tractos_ventas     pib_real pib_manufacturing
## 1      2002 Q1           1181   12 415 541         2 184 801
## 2      2002 Q2           1129   13 112 363         2 272 069
## 3      2002 Q3           1191   12 889 950         2 265 685
## 4      2002 Q4           1494   13 084 104         2 264 165
## 5      2003 Q1           1383   12 803 327         2 189 099
## 6      2003 Q2           1811   13 182 932         2 233 791
##   industrial_activity_index gov_consumption investment interest_rate
## 1                     86.38       1,604,492  2,341,839         7.537
## 2                     86.95       1,646,079  2,371,656         6.757
## 3                     87.47       1,593,893  2,499,619         7.370
## 4                     88.29       1,624,808  2,442,262         7.110
## 5                     88.42       1,622,763  2,460,229         8.937
## 6                     88.52       1,668,969  2,379,630         5.583
##   exchange_rate retail_sales_index q2 q3 q4 emission_norm18
## 1         9.090                 NA  0  0  0               0
## 2         9.632                 NA  1  0  0               0
## 3         9.927                 NA  0  1  0               0
## 4        10.197                 NA  0  0  1               0
## 5        10.929                 NA  0  0  0               0
## 6        10.411                 NA  1  0  0               0

Limpieza y ordenamiento de dataset

data_tractos$year_quarter <- as.yearqtr(data_tractos$year_quarter)

data_tractos$pib_real<- as.numeric(str_replace_all(data_tractos$pib_real, " ", ""))
data_tractos$pib_manufacturing <- as.numeric(str_replace_all(data_tractos$pib_manufacturing, " ",""))
data_tractos$gov_consumption <- as.numeric(str_replace_all(data_tractos$gov_consumption, ",", ""))
data_tractos$investment <- as.numeric(str_replace_all(data_tractos$investment, ",",""))

MODELOS SELECCIONADOS

MODELO A: PIB real + Índice retail + Tipo de cambio -> Serie corta 2008-2019

tractos_regA <- lm(tractos_ventas ~ pib_real + retail_sales_index + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos)

summary(tractos_regA)
## 
## Call:
## lm(formula = tractos_ventas ~ pib_real + retail_sales_index + 
##     exchange_rate + emission_norm18 + q2 + q3 + q4, data = data_tractos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1276.4  -355.2    -5.5   352.3  1361.7 
## 
## Coefficients:
##                          Estimate     Std. Error t value     Pr(>|t|)    
## (Intercept)        -16449.3809733   2367.9042656  -6.947 0.0000000222 ***
## pib_real                0.0003305      0.0002139   1.545      0.13028    
## retail_sales_index    191.7149521     55.5400042   3.452      0.00133 ** 
## exchange_rate        -382.8045249     78.7458432  -4.861 0.0000184038 ***
## emission_norm18      1181.5489051    419.3489718   2.818      0.00748 ** 
## q2                    164.2595799    251.9054034   0.652      0.51809    
## q3                    284.5755583    239.8517637   1.186      0.24244    
## q4                    712.1879781    321.3873265   2.216      0.03245 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 563.3 on 40 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.8084, Adjusted R-squared:  0.7748 
## F-statistic: 24.11 on 7 and 40 DF,  p-value: 0.000000000001787
tractos_regA_result <- as.data.frame(residuals(tractos_regA))
tractos_regA_result$residuos <- tractos_regA_result[,1]
tractos_regA_result$`residuals(tractos_regA)` <- NULL

tractos_regA_result$predicts <- predict(tractos_regA)

tractos_regA_result$year_quarter <- data_tractos[25:72,1]

tractos_regA_result$observado <- data_tractos[25:72, 2]

tractos_regA_result$ratio_error <- (tractos_regA_result[,1] / tractos_regA_result[,4] * 100)

ggplot() +
  geom_line(data= tractos_regA_result, aes(x= year_quarter, y= observado, color= "Ventas históricas"), color = "black") +
  geom_line(data= tractos_regA_result, aes(x= year_quarter, y= predicts, color= "Ventas modeladas")) +
    labs(title = "Venta de tractocamiones en México (2008-2019)",
       subtitle = "MODELO A: ventas históricas vs. modeladas",
       y= "ventas tractocamiones",
       x= "Año / trimestre",
       color = "Series",
       caption = "Fuente: Frost & Sullivan") +
  theme(axis.text.x = element_text(angle = 45, hjust =1))

MODELO A: Prueba de capacidad predictiva -> retro-proyección 2018-2019

data_tractos_histA <- data_tractos[25:64,]
data_tractos_futureA <- data_tractos [65:72,]

tractos_regA_prueba <- lm(tractos_ventas ~ pib_real + retail_sales_index + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos_histA)
summary(tractos_regA_prueba)
## 
## Call:
## lm(formula = tractos_ventas ~ pib_real + retail_sales_index + 
##     exchange_rate + emission_norm18 + q2 + q3 + q4, data = data_tractos_histA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -812.74 -346.72  -65.02  300.31  927.64 
## 
## Coefficients:
##                           Estimate      Std. Error t value       Pr(>|t|)    
## (Intercept)        -21982.83875505   2496.18100369  -8.807 0.000000000461 ***
## pib_real                0.00000692      0.00021204   0.033        0.97417    
## retail_sales_index    309.37088787     57.96322916   5.337 0.000007444860 ***
## exchange_rate        -472.32447317     73.78390679  -6.401 0.000000341249 ***
## emission_norm18      1236.90261626    535.41146197   2.310        0.02748 *  
## q2                    286.16590788    241.30086340   1.186        0.24438    
## q3                    536.90858117    235.28215181   2.282        0.02928 *  
## q4                   1118.57672807    310.74791970   3.600        0.00106 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 492.3 on 32 degrees of freedom
## Multiple R-squared:  0.8546, Adjusted R-squared:  0.8228 
## F-statistic: 26.87 on 7 and 32 DF,  p-value: 0.000000000011
tractos_regA_result_prueba <- as.data.frame(residuals(tractos_regA_prueba))
tractos_regA_result_prueba$residuos <- tractos_regA_result_prueba[,1]
tractos_regA_result_prueba$`residuals(tractos_regA_prueba)` <- NULL

tractos_regA_result_prueba$predicts <- predict(tractos_regA_prueba)
tractos_regA_result_prueba$year_quarter <- data_tractos_histA[,1]
tractos_regA_result_prueba$observados <- data_tractos_histA[,2]
tractos_regA_result_prueba[65:72, 4] <- data_tractos_futureA[,2] 
tractos_regA_result_prueba[65:72, 3] <- data_tractos_futureA[,1]

tractos_regA_result_prueba1 <- predict.lm(tractos_regA_prueba, data_tractos_futureA, interval = "confidence", level = 0.9)

tractos_regA_result_prueba$forecast <- NA
tractos_regA_result_prueba[65:72, 5] <- tractos_regA_result_prueba1[,1]
tractos_regA_result_prueba$lower <- NA
tractos_regA_result_prueba[65:72, 6] <- tractos_regA_result_prueba1[1:8,2]
tractos_regA_result_prueba$upper <- NA
tractos_regA_result_prueba[65:72, 7] <- tractos_regA_result_prueba1[1:8,3]

tractos_regA_result_prueba = tractos_regA_result_prueba[-c(41.1:64.1),]

ggplot()+
  geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= observados), color = "gray20") +
  geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
  geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= forecast, color = "proyección media"), linetype = "dotdash") +
  geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= lower, color = "Lower confidence"), linetype = "dotdash") +
  geom_line(data= tractos_regA_result_prueba, aes(x= year_quarter, y= upper, color = "Upper confidence"), linetype = "dotdash") +
  labs( title = "Ventas de tractocamiones en México",
        subtitle = "Modelo A: ventas 2008-2017 c/ proyección 2018-2019",
        y= "Ventas tractocamiones",
        x= "Año / trimestre",
        color= "Series",
        caption= "Fuente: Frost & Sullivan") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 40 row(s) containing missing values (geom_path).

## Warning: Removed 40 row(s) containing missing values (geom_path).

## Warning: Removed 40 row(s) containing missing values (geom_path).

MODELO B: PIB manufacturero + Consumo de gobierno + Inversión + tipo de cambio -> Serie larga 2002-2019

tractos_regB <- lm(tractos_ventas ~ pib_manufacturing + gov_consumption + investment + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos)

summary(tractos_regB)
## 
## Call:
## lm(formula = tractos_ventas ~ pib_manufacturing + gov_consumption + 
##     investment + exchange_rate + emission_norm18 + q2 + q3 + 
##     q4, data = data_tractos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -888.41 -345.77    6.56  326.24 1273.84 
## 
## Coefficients:
##                        Estimate    Std. Error t value         Pr(>|t|)    
## (Intercept)       -6370.2857956  1052.1660249  -6.054 0.00000008574645 ***
## pib_manufacturing     0.0055068     0.0006456   8.530 0.00000000000424 ***
## gov_consumption      -0.0054912     0.0011063  -4.964 0.00000555727491 ***
## investment            0.0018024     0.0003512   5.131 0.00000297670411 ***
## exchange_rate       -15.2320276    49.5054002  -0.308           0.7593    
## emission_norm18    1262.1702022   275.9738064   4.574 0.00002301013938 ***
## q2                  271.2684939   179.6445072   1.510           0.1360    
## q3                  -95.1434592   182.1998994  -0.522           0.6034    
## q4                  366.6215834   178.1678341   2.058           0.0438 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 516.4 on 63 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.8519, Adjusted R-squared:  0.8331 
## F-statistic: 45.29 on 8 and 63 DF,  p-value: < 0.00000000000000022
tractos_regB_result <- as.data.frame(residuals(tractos_regB))
tractos_regB_result$residuos <- tractos_regB_result[,1]
tractos_regB_result$`residuals(tractos_regB)` <- NULL

tractos_regB_result$predicts <- predict(tractos_regB)

tractos_regB_result$year_quarter <- data_tractos[1:72,1]

tractos_regB_result$observado <- data_tractos[1:72, 2]

tractos_regB_result$ratio_error <- (tractos_regB_result[,1] / tractos_regB_result[,4] * 100)

ggplot() +
  geom_line(data= tractos_regB_result, aes(x= year_quarter, y= observado, color= "Ventas históricas"), color = "black") +
  geom_line(data= tractos_regB_result, aes(x= year_quarter, y= predicts, color= "Ventas modeladas")) +
    labs(title = "Venta de tractocamiones en México (2002-2019)",
       subtitle = "MODELO B: ventas históricas vs. modeladas",
       y= "ventas tractocamiones",
       x= "Año / trimestre",
       color = "Series",
       caption = "Fuente: Frost & Sullivan") +
  theme(axis.text.x = element_text(angle = 45, hjust =1))

MODELO B: Prueba de capacidad predictiva -> retro-proyección 2017-2019

data_tractos_histB <- data_tractos[1:60,]
data_tractos_futureB <- data_tractos [61:72,]

tractos_regB_prueba <- lm(tractos_ventas ~ pib_manufacturing + gov_consumption + investment + exchange_rate + emission_norm18 + q2 + q3 +q4, data= data_tractos_histB)
summary(tractos_regB_prueba)
## 
## Call:
## lm(formula = tractos_ventas ~ pib_manufacturing + gov_consumption + 
##     investment + exchange_rate + emission_norm18 + q2 + q3 + 
##     q4, data = data_tractos_histB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -947.18 -325.64  -12.01  320.86 1287.74 
## 
## Coefficients:
##                        Estimate    Std. Error t value      Pr(>|t|)    
## (Intercept)       -6489.7305102  1293.2894173  -5.018 0.00000671685 ***
## pib_manufacturing     0.0056644     0.0007817   7.246 0.00000000222 ***
## gov_consumption      -0.0058246     0.0011801  -4.936 0.00000893806 ***
## investment            0.0018811     0.0004367   4.308 0.00007521693 ***
## exchange_rate        -8.8067958    56.4230514  -0.156       0.87658    
## emission_norm18    1091.9457835   348.2445069   3.136       0.00285 ** 
## q2                  319.0898791   201.7781173   1.581       0.11997    
## q3                    7.3179259   204.4790973   0.036       0.97159    
## q4                  311.0141919   203.9873984   1.525       0.13352    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 532.7 on 51 degrees of freedom
## Multiple R-squared:  0.8353, Adjusted R-squared:  0.8094 
## F-statistic: 32.32 on 8 and 51 DF,  p-value: < 0.00000000000000022
tractos_regB_result_prueba <- as.data.frame(residuals(tractos_regB_prueba))
tractos_regB_result_prueba$residuos <- tractos_regB_result_prueba[,1]
tractos_regB_result_prueba$`residuals(tractos_regB_prueba)` <- NULL

tractos_regB_result_prueba$predicts <- predict(tractos_regB_prueba)
tractos_regB_result_prueba$year_quarter <- data_tractos_histB[,1]
tractos_regB_result_prueba$observados <- data_tractos_histB[,2]
tractos_regB_result_prueba[61:72, 4] <- data_tractos_futureB[,2] 
tractos_regB_result_prueba[61:72, 3] <- data_tractos_futureB[,1]

tractos_regB_result_prueba1 <- predict.lm(tractos_regB_prueba, data_tractos_futureB, interval = "confidence", level = 0.9)

tractos_regB_result_prueba$forecast <- NA
tractos_regB_result_prueba[61:72, 5] <- tractos_regB_result_prueba1[1:12,1]
tractos_regB_result_prueba$lower <- NA
tractos_regB_result_prueba[61:72, 6] <- tractos_regB_result_prueba1[1:12,2]
tractos_regB_result_prueba$upper <- NA
tractos_regB_result_prueba[61:72, 7] <- tractos_regB_result_prueba1[1:12,3]

ggplot()+
  geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= observados), color = "gray20") +
  geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
  geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= forecast, color = "proyección media"), linetype = "dotdash") +
  geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= lower, color = "Lower confidence"), linetype = "dotdash") +
  geom_line(data= tractos_regB_result_prueba, aes(x= year_quarter, y= upper, color = "Upper confidence"), linetype = "dotdash") +
  labs( title = "Ventas de tractocamiones en México",
        subtitle = "Modelo A: ventas 2002-2016 c/ proyección 2017-2019",
        y= "Ventas tractocamiones",
        x= "Año / trimestre",
        color= "Series",
        caption= "Fuente: Frost & Sullivan") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 60 row(s) containing missing values (geom_path).

## Warning: Removed 60 row(s) containing missing values (geom_path).

## Warning: Removed 60 row(s) containing missing values (geom_path).