data_tractos <-read.csv("/Users/martinsingla/Desktop/Mexico_tractos_data.csv")

#DATOS

data_tractos$year_quarter <- as.yearqtr(data_tractos$year_quarter)

#MODELO DE REGRESIÓN (A: Serie 2008-2019)

tractos_reg1 <- lm(tracto_sales_quarterly ~ gdp_manufacturing + gdp_current_prices + gdp_foodnbev + construction_prod_value + wholesale_sales_index + retail_sales_index + exports + imports + private_consumption + gov_consumption + investment + inflation_rate + interest_rate + exchange_rate + stock_change_index + q2 + q3 + q4 + emission_norm18, data = data_tractos)

summary(tractos_reg1)
## 
## Call:
## lm(formula = tracto_sales_quarterly ~ gdp_manufacturing + gdp_current_prices + 
##     gdp_foodnbev + construction_prod_value + wholesale_sales_index + 
##     retail_sales_index + exports + imports + private_consumption + 
##     gov_consumption + investment + inflation_rate + interest_rate + 
##     exchange_rate + stock_change_index + q2 + q3 + q4 + emission_norm18, 
##     data = data_tractos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1016.22  -229.80    22.55   261.33   664.32 
## 
## Coefficients:
##                               Estimate     Std. Error t value Pr(>|t|)    
## (Intercept)             -25440.7876633   6253.9685980  -4.068 0.000350 ***
## gdp_manufacturing           -0.0018829      0.0015522  -1.213 0.235257    
## gdp_current_prices          -0.0006963      0.0005829  -1.195 0.242290    
## gdp_foodnbev                 0.0007140      0.0091479   0.078 0.938343    
## construction_prod_value      0.0709499      0.0341852   2.075 0.047245 *  
## wholesale_sales_index      -54.8279836     71.1682588  -0.770 0.447515    
## retail_sales_index         297.9535121     79.9801882   3.725 0.000873 ***
## exports                      0.0264182      0.0587248   0.450 0.656272    
## imports                      0.0556521      0.0638588   0.871 0.390900    
## private_consumption          0.0010825      0.0008981   1.205 0.238200    
## gov_consumption             -0.0042136      0.0049976  -0.843 0.406302    
## investment                  -0.0006863      0.0014246  -0.482 0.633707    
## inflation_rate           13883.0468667  20314.0113094   0.683 0.499957    
## interest_rate              -80.9868598    208.8367754  -0.388 0.701097    
## exchange_rate              113.3186516    216.1278931   0.524 0.604187    
## stock_change_index           0.0733078      0.0427849   1.713 0.097691 .  
## q2                         309.1066638    339.1210398   0.911 0.369816    
## q3                        -295.6913081    359.9437669  -0.821 0.418306    
## q4                         -34.0060563    672.5621112  -0.051 0.960034    
## emission_norm18           1445.3266585    553.4044754   2.612 0.014319 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 483.4 on 28 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.9012, Adjusted R-squared:  0.8342 
## F-statistic: 13.45 on 19 and 28 DF,  p-value: 0.00000000191
reg1_results <- as.data.frame(residuals(tractos_reg1))
reg1_results$residuos <- reg1_results$`residuals(tractos_reg1)`
reg1_results$`residuals(tractos_reg1)` <- NULL
reg1_results$predicts <- predict(tractos_reg1)
reg1_results$valores_observados <- data_tractos[25:72, 24]
reg1_results$year_quarter <- data_tractos[25:72, 3]

#VISUALIZACIÓN DE MODELO de REG. MÚLTIPLE (A)

ggplot() +
  geom_line(data = reg1_results, aes(x= year_quarter,y= valores_observados, color = "Ventas históricas")) +
  geom_line(data = reg1_results, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
  labs(title= "(A) Ventas de tractocamiones en México (2008-2019)",
       subtitle= "Ventas históricas observadas vs. modeladas (serie trimestral)",
       y = "ventas tractocamiones",
       x = "Año / Trimestre",
       color = "Series") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#TEST DE AUTOCORRELACIÓN DE RESIDUOS (A)

checkresiduals(tractos_reg1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 23
## 
## data:  Residuals
## LM test = 41.641, df = 23, p-value = 0.009993

Los residuos se distribuyen normalmente y el test ACF muestra que no hay auto-correlación de residuos. Esto nos indica que el comportamiento de los residuos es aleatorio y no responde al de alguna variable que no se esté contemplando.

#MODELO DE REGRESIÓN (B: Serie 2002-2019)

tractos_reg2 <- lm(tracto_sales_quarterly ~ gdp_manufacturing + gdp_current_prices + gdp_foodnbev + exports + imports + private_consumption + gov_consumption + investment + inflation_rate + interest_rate + exchange_rate + stock_change_index + q2 + q3 + q4 + emission_norm18, data = data_tractos)

summary(tractos_reg2)
## 
## Call:
## lm(formula = tracto_sales_quarterly ~ gdp_manufacturing + gdp_current_prices + 
##     gdp_foodnbev + exports + imports + private_consumption + 
##     gov_consumption + investment + inflation_rate + interest_rate + 
##     exchange_rate + stock_change_index + q2 + q3 + q4 + emission_norm18, 
##     data = data_tractos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1115.23  -318.36    47.04   346.57  1139.06 
## 
## Coefficients:
##                          Estimate    Std. Error t value Pr(>|t|)   
## (Intercept)         -3423.4103213  1543.6306859  -2.218  0.03072 * 
## gdp_manufacturing       0.0021729     0.0010962   1.982  0.05245 . 
## gdp_current_prices     -0.0011564     0.0005297  -2.183  0.03332 * 
## gdp_foodnbev           -0.0170306     0.0064738  -2.631  0.01103 * 
## exports                 0.1218323     0.0513239   2.374  0.02113 * 
## imports                -0.0629820     0.0563860  -1.117  0.26886   
## private_consumption     0.0020060     0.0006500   3.086  0.00317 **
## gov_consumption        -0.0023959     0.0030965  -0.774  0.44239   
## investment              0.0006543     0.0009752   0.671  0.50504   
## inflation_rate      21390.6646044 16681.7259997   1.282  0.20512   
## interest_rate         111.1524355   109.3106697   1.017  0.31368   
## exchange_rate         243.8185438   189.8433552   1.284  0.20442   
## stock_change_index      0.0872203     0.0299745   2.910  0.00521 **
## q2                    730.8545629   299.4110771   2.441  0.01789 * 
## q3                    480.6158720   278.5527104   1.725  0.09007 . 
## q4                    821.3682950   375.2273307   2.189  0.03286 * 
## emission_norm18      1355.3493204   526.0293239   2.577  0.01269 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 571 on 55 degrees of freedom
## Multiple R-squared:  0.8419, Adjusted R-squared:  0.7959 
## F-statistic:  18.3 on 16 and 55 DF,  p-value: < 0.00000000000000022

Se quitó de la serie los indicadores “construction_prod_value”, “wholesale_sales_index” y “retail_sales_index”, debido a la falta de datos para toda la serie histórica de “tracto_sales_quarterly”. Esto nos permite abarcar el período 2001-2019.

##VISUALIZACIÓN DE MODELO de REG. MÚLTIPLE (B)

reg2_results <- as.data.frame(residuals(tractos_reg2))
reg2_results$residuos <- reg2_results$`residuals(tractos_reg2)`
reg2_results$`residuals(tractos_reg2)` <- NULL
reg2_results$predicts <- predict(tractos_reg2)
reg2_results$valores_observados <- data_tractos[0:72, 24]
reg2_results$year_quarter <- data_tractos[0:72, 3]
ggplot() +
  geom_line(data = reg2_results, aes(x= year_quarter,y= valores_observados, color = "Ventas históricas")) +
  geom_line(data = reg2_results, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
  labs(title= "(B) Ventas de tractocamiones en México (2002-2019)",
       subtitle= "Ventas históricas observadas vs. modeladas (serie trimestral)",
       y = "ventas tractocamiones",
       x = "Año / Trimestre",
       color = "Series") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#TEST DE AUTOCORRELACIÓN DE RESIDUOS (B)

checkresiduals(tractos_reg2)

## 
##  Breusch-Godfrey test for serial correlation of order up to 20
## 
## data:  Residuals
## LM test = 30.375, df = 20, p-value = 0.064

Los residuos se distribuyen de forma normal, sin embargo el test ACF indica valores extremos en el Lag 1, lo cual puede estar indicando cierto nivel de auto-correlación, que disminuía cuando incluíamos las variables eliminadas, por lo que puede ser desestimado.

#MODELO DE REGRESIÓN AJUSTADO (C: Serie 2008-2019)

tractos_reg3 <- lm(tracto_sales_quarterly ~ gdp_manufacturing + gdp_current_prices + construction_prod_value + retail_sales_index + exports + imports + private_consumption + gov_consumption + stock_change_index + q2 + q3 + q4 + emission_norm18, data = data_tractos)

summary(tractos_reg3)
## 
## Call:
## lm(formula = tracto_sales_quarterly ~ gdp_manufacturing + gdp_current_prices + 
##     construction_prod_value + retail_sales_index + exports + 
##     imports + private_consumption + gov_consumption + stock_change_index + 
##     q2 + q3 + q4 + emission_norm18, data = data_tractos)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -990.3 -252.3  -31.7  287.6  897.6 
## 
## Coefficients:
##                               Estimate     Std. Error t value    Pr(>|t|)    
## (Intercept)             -23752.8679962   3957.0316540  -6.003 0.000000854 ***
## gdp_manufacturing           -0.0016397      0.0012653  -1.296     0.20373    
## gdp_current_prices          -0.0008258      0.0004783  -1.727     0.09333 .  
## construction_prod_value      0.0509530      0.0204019   2.497     0.01751 *  
## retail_sales_index         243.8758001     47.0655256   5.182 0.000009984 ***
## exports                      0.0257459      0.0378076   0.681     0.50050    
## imports                      0.0269743      0.0402577   0.670     0.50736    
## private_consumption          0.0012233      0.0006191   1.976     0.05633 .  
## gov_consumption             -0.0031089      0.0013248  -2.347     0.02490 *  
## stock_change_index           0.0722213      0.0232456   3.107     0.00380 ** 
## q2                         397.0323008    236.4208940   1.679     0.10225    
## q3                          18.9736443    231.2908390   0.082     0.93510    
## q4                         640.3295705    338.0081906   1.894     0.06670 .  
## emission_norm18           1479.2112119    421.7208142   3.508     0.00129 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 457.4 on 34 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.8926, Adjusted R-squared:  0.8516 
## F-statistic: 21.74 on 13 and 34 DF,  p-value: 0.0000000000009764

#Quito “GDP_foodnbev”, “investment”, “exchange_rate”, “interest_rate” , “wholesales_index” e “inflation”

reg3_results <- as.data.frame(residuals(tractos_reg3))
reg3_results$residuos <- reg3_results$`residuals(tractos_reg3)`
reg3_results$`residuals(tractos_reg3)` <- NULL
reg3_results$predicts <- predict(tractos_reg3)
reg3_results$valores_observados <- data_tractos[25:72, 24]
reg3_results$year_quarter <- data_tractos[25:72, 3]

#VISUALIZACIÓN DE MODELO de REG. MÚLTIPLE (C)

ggplot() +
  geom_line(data = reg3_results, aes(x= year_quarter,y= valores_observados, color = "Ventas históricas")) +
  geom_line(data = reg3_results, aes(x= year_quarter, y= predicts, color = "Ventas modeladas")) +
  labs(title= "(C) Ventas de tractocamiones en México (2008-2019)",
       subtitle= "Ventas históricas observadas vs. modeladas (serie trimestral)",
       y = "ventas tractocamiones",
       x = "Año / Trimestre",
       color = "Series") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#TEST DE AUTOCORRELACIÓN DE RESIDUOS (C)

checkresiduals(tractos_reg3)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals
## LM test = 27.261, df = 17, p-value = 0.05436