file_path <- "Caso Analitica Estrategica.xlsx"
sheet_names <- excel_sheets(file_path)
for (sheet in sheet_names) {
df_name <- paste0("df_", sheet)
assign(df_name, read_excel(file_path, sheet = sheet))
}
ls(pattern = "^df_")## [1] "df_name" "df_Production Cost" "df_Raw Material"
## [4] "df_Sales" "df_SGA" "df_Transport Costs"
# Correción de Hora
df_Sales$Date<-as.Date(df_Sales$Date)
df_Sales$Date <- format(df_Sales$Date, "%Y-%m")ventas <- df_Sales %>%
group_by(Product, Region, Date) %>%
summarise(Price = sum(Price), Volume = sum(Volume))## `summarise()` has grouped output by 'Product', 'Region'. You can override using
## the `.groups` argument.
# Transformar de formato ancho a formato largo
Raw_Material <- `df_Raw Material` %>%
pivot_longer(cols = `1`:`12`,
names_to = "Month",
values_to = "Cost")
# Combinar Año y Mes en una nueva columna Fecha y formatear adecuadamente
Raw_Material <- Raw_Material %>%
mutate(Date = paste(Year, Month, "01", sep = "-"),
Date = as.Date(Date, format = "%Y-%m-%d"),
Date = format(Date, "%Y-%m")) %>% #
select(Product, Date, Cost) # Transformar de formato ancho a formato largo
Production_Cost <- `df_Production Cost` %>%
pivot_longer(cols = `1`:`12`,
names_to = "Month",
values_to = "Cost")
# Combinar Año y Mes en una nueva columna Fecha y formatear adecuadamente
Production_Cost <- Production_Cost %>%
mutate(Date = paste(Year, Month, "01", sep = "-"),
Date = as.Date(Date, format = "%Y-%m-%d"),
Date = format(Date, "%Y-%m")) %>% #
select(Product, Date, Cost)# Transformar de formato ancho a formato largo
SGA <- `df_SGA` %>%
pivot_longer(cols = `1`:`12`,
names_to = "Month",
values_to = "Cost")
# Combinar Año y Mes en una nueva columna Fecha y formatear adecuadamente
SGA <- SGA %>%
mutate(Date = paste(Year, Month, "01", sep = "-"),
Date = as.Date(Date, format = "%Y-%m-%d"),
Date = format(Date, "%Y-%m")) %>% #
select(Product, Date, Cost)ventas_con_raw_material <- ventas %>%
left_join(Raw_Material, by = c("Product", "Date")) %>%
rename(`Raw Material` = Cost)
ventas_con_volumen_total_rm <- ventas_con_raw_material %>%
group_by(Product, Date) %>%
summarise(Volumen_Total = sum(Volume))## `summarise()` has grouped output by 'Product'. You can override using the
## `.groups` argument.
ventas_con_product_costs <- ventas %>%
left_join(Production_Cost, by = c("Product", "Date")) %>%
rename(`Production Cost` = Cost)
ventas_con_volumen_total_pc <- ventas_con_product_costs %>%
group_by(Product, Date) %>%
summarise(Volumen_Total = sum(Volume))## `summarise()` has grouped output by 'Product'. You can override using the
## `.groups` argument.
ventas_con_sga <- ventas %>%
left_join(SGA, by = c("Product", "Date")) %>%
rename(`SGA` = Cost)
ventas_con_volumen_total_sga <- ventas_con_sga %>%
group_by(Product, Date) %>%
summarise(Volumen_Total = sum(Volume))## `summarise()` has grouped output by 'Product'. You can override using the
## `.groups` argument.
#wb <- createWorkbook()
#addWorksheet(wb, "Sales")
#writeData(wb, sheet = "Sales", ventas_completa)
#addWorksheet(wb, "Raw Material")
#writeData(wb, sheet = "Raw Material", Raw_Material)
#addWorksheet(wb, "SGA")
#writeData(wb, sheet = "SGA", SGA)
#addWorksheet(wb, "Production Cost")
#writeData(wb, sheet = "Production Cost", Production_Cost)
#addWorksheet(wb, "Transport Cost")
#writeData(wb, sheet = "Transport Cost", `df_Transport Costs`)
#saveWorkbook(wb, "Entregable DE ACERO.xlsx", overwrite = TRUE)
## Correlacion
## 1 features with more than 20 categories ignored!
## Date: 36 categories
## Heterogeneidad
Podemos ver que comparando las medias tanto entre los diferentes meses
Debido a la estructura de los datos de la base de ventas, considero que lo más apropiado es trabajar los datos como una modelo de datos panel
## Warning in pdata.frame(ventas_completa, index = c("Product", "Date")): duplicate couples (id-time) in resulting pdata.frame
## to find out which, use, e.g., table(index(your_pdataframe), useNA = "ifany")
pooled<-plm(log(Price)~Region+Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = ventas_pd,
model = "pooling")
summary(pooled)## Pooling Model
##
## Call:
## plm(formula = log(Price) ~ Region + Volume + Transport.Cost +
## Raw.Material + Production.Cost + SGA, data = ventas_pd, model = "pooling")
##
## Balanced Panel: n = 4, T = 36, N = 550
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.954708 -0.190381 0.006152 0.209539 0.928920
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 8.3380e+00 1.4700e-01 56.7210 < 2.2e-16 ***
## RegionNorte -2.7430e-01 4.6535e-02 -5.8944 6.621e-09 ***
## RegionOccidente -1.3200e+00 1.2109e-01 -10.9009 < 2.2e-16 ***
## RegionSur -8.5154e-01 8.5184e-02 -9.9965 < 2.2e-16 ***
## Volume 3.1524e-04 1.6695e-05 18.8822 < 2.2e-16 ***
## Transport.Cost 3.4766e-02 3.0410e-03 11.4323 < 2.2e-16 ***
## Raw.Material 1.0050e-05 3.9640e-05 0.2535 0.8000
## Production.Cost 4.2576e-05 1.5800e-04 0.2695 0.7877
## SGA -2.0386e-05 1.4813e-04 -0.1376 0.8906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 171.86
## Residual Sum of Squares: 56.993
## R-Squared: 0.66837
## Adj. R-Squared: 0.66346
## F-statistic: 136.291 on 8 and 541 DF, p-value: < 2.22e-16
within<-plm(log(Price)~Region+Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = ventas_pd,
model = "within")
summary(within)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = log(Price) ~ Region + Volume + Transport.Cost +
## Raw.Material + Production.Cost + SGA, data = ventas_pd, model = "within")
##
## Balanced Panel: n = 4, T = 36, N = 550
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.8882486 -0.1858114 0.0055747 0.2077419 0.8785219
##
## Coefficients: (1 dropped because of singularities)
## Estimate Std. Error t-value Pr(>|t|)
## RegionNorte 1.7665e-02 3.8559e-02 0.4581 0.6470
## RegionOccidente -1.7114e-02 3.8692e-02 -0.4423 0.6584
## RegionSur 1.7331e-02 3.8635e-02 0.4486 0.6539
## Volume 3.3070e-04 1.6792e-05 19.6942 <2e-16 ***
## Raw.Material -2.9629e-05 3.9848e-05 -0.7436 0.4575
## Production.Cost 6.2841e-06 1.5544e-04 0.0404 0.9678
## SGA 9.7336e-05 1.4811e-04 0.6572 0.5113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 156.37
## Residual Sum of Squares: 54.774
## R-Squared: 0.64972
## Adj. R-Squared: 0.64322
## F-statistic: 142.824 on 7 and 539 DF, p-value: < 2.22e-16
##
## F test for individual effects
##
## data: log(Price) ~ Region + Volume + Transport.Cost + Raw.Material + ...
## F = 10.92, df1 = 2, df2 = 539, p-value = 2.244e-05
## alternative hypothesis: significant effects
Una vez planteado los dos modelos y haciendo un pFtest para poder determinar que modelo es mejor pudimos obtener un p-value menor a 0.05. Dado esto rechazamos nuestra Ho concluyendo que nuestro modelo de efectos fijos es mejor al modelo agrupado. Sin embargo aun tenemos otro tipo de modelos de regresion para datos panel que se pueden testear.
walhus<-plm(log(Price)~Region+Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = ventas_pd,
model = "random",
random.method = "walhus")
summary(walhus)## Oneway (individual) effect Random Effect Model
## (Wallace-Hussain's transformation)
##
## Call:
## plm(formula = log(Price) ~ Region + Volume + Transport.Cost +
## Raw.Material + Production.Cost + SGA, data = ventas_pd, model = "random",
## random.method = "walhus")
##
## Balanced Panel: n = 4, T = 36, N = 550
##
## Effects:
## var std.dev share
## idiosyncratic 0.10070 0.31733 0.9
## individual 0.01119 0.10577 0.1
## theta: 0.5528
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.9148843 -0.1853130 0.0096639 0.2108817 0.8884935
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.2960e+00 2.9977e-01 27.6747 < 2.2e-16 ***
## RegionNorte -2.8030e-01 6.4832e-02 -4.3236 1.535e-05 ***
## RegionOccidente -1.3556e+00 2.3843e-01 -5.6855 1.304e-08 ***
## RegionSur -8.7506e-01 1.6119e-01 -5.4286 5.679e-08 ***
## Volume 3.2734e-04 1.6749e-05 19.5437 < 2.2e-16 ***
## Transport.Cost 3.5698e-02 6.2672e-03 5.6960 1.226e-08 ***
## Raw.Material -2.1060e-05 3.9752e-05 -0.5298 0.5963
## Production.Cost 1.4077e-05 1.5577e-04 0.0904 0.9280
## SGA 7.2032e-05 1.4793e-04 0.4869 0.6263
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 159.47
## Residual Sum of Squares: 55.253
## R-Squared: 0.65352
## Adj. R-Squared: 0.64839
## Chisq: 1020.4 on 8 DF, p-value: < 2.22e-16
#ameniya<-plm(Price~Region+Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
#data = ventas_pd,
#model = "random",
#random.method = "ameniya")
#summary(ameniya)Método no relevante
nerlove<-plm(log(Price)~Region+Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = ventas_pd,
model = "random",
random.method = "nerlove")
summary(nerlove)## Oneway (individual) effect Random Effect Model
## (Nerlove's transformation)
##
## Call:
## plm(formula = log(Price) ~ Region + Volume + Transport.Cost +
## Raw.Material + Production.Cost + SGA, data = ventas_pd, model = "random",
## random.method = "nerlove")
##
## Balanced Panel: n = 4, T = 36, N = 550
##
## Effects:
## var std.dev share
## idiosyncratic 0.09959 0.31558 0.673
## individual 0.04829 0.21976 0.327
## theta: 0.7672
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.901207 -0.183329 0.008965 0.207520 0.884683
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 8.2876e+00 5.6505e-01 14.6669 < 2.2e-16 ***
## RegionNorte -2.8152e-01 1.0583e-01 -2.6602 0.007810 **
## RegionOccidente -1.3628e+00 4.4576e-01 -3.0572 0.002234 **
## RegionSur -8.7980e-01 2.9837e-01 -2.9487 0.003191 **
## Volume 3.2977e-04 1.6758e-05 19.6791 < 2.2e-16 ***
## Transport.Cost 3.5886e-02 1.1839e-02 3.0312 0.002436 **
## Raw.Material -2.7274e-05 3.9769e-05 -0.6858 0.492833
## Production.Cost 8.4240e-06 1.5532e-04 0.0542 0.956747
## SGA 9.0386e-05 1.4786e-04 0.6113 0.541009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 157.21
## Residual Sum of Squares: 54.906
## R-Squared: 0.65075
## Adj. R-Squared: 0.64559
## Chisq: 1008.04 on 8 DF, p-value: < 2.22e-16
##
## Hausman Test
##
## data: log(Price) ~ Region + Volume + Transport.Cost + Raw.Material + ...
## chisq = 6.0576, df = 7, p-value = 0.533
## alternative hypothesis: one model is inconsistent
##
## Hausman Test
##
## data: log(Price) ~ Region + Volume + Transport.Cost + Raw.Material + ...
## chisq = 0.24856, df = 7, p-value = 0.9999
## alternative hypothesis: one model is inconsistent
Una vez hecha la comprobación entre los modelos de efectos fijos y aleatorios, podemos concluis que el modelo más adecuado es el modelo de efectos aleatorios (ya sea walhus o nerlove) en lugar del modelo de efectos fijos.
Para poder decidir con que modelo quedarnos entre Walhus o Nerlove usare las métricas del R Squared. Observando en los summary anteriores podemos observar que el modelo que mejor explica la variable dependiente es el metodo walhus
Basados en nuestro modelo seleccionado, podemos ver que las variables estadisticamente significantes son Intercepto, Region, Volumen y Transporte, siendo volumen la cual tiene un nivel de significancia mayor a 99% y el resto de variables mencionadas entre 99% y 99.9%.
Lo interesante viene siendo que el la relacion en cuanto a transporte es positiva, lo que puede significar que aquellas lugares donde se gasta más en transporte generan mejores niveles ventas.
ProductoA<-ventas_completa%>%filter(Product == "A")
ProductoB<-ventas_completa%>%filter(Product == "B")
ProductoC<-ventas_completa%>%filter(Product == "C")
ProductoD<-ventas_completa%>%filter(Product == "D")par(mfrow=c(2, 2))
hist(ProductoA$Price)
hist(ProductoB$Price)
hist(ProductoC$Price)
hist(ProductoD$Price)pooled_A<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = A_pd,
model = "pooling")
summary(pooled_A)## Pooling Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = A_pd, model = "pooling")
##
## Unbalanced Panel: n = 4, T = 33-35, N = 136
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.718451 -0.225426 0.038262 0.188019 0.891902
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 9.7496e+00 1.2404e-01 78.6032 <2e-16 ***
## Volume 3.4566e-04 3.0401e-05 11.3698 <2e-16 ***
## Transport.Cost 1.3229e-03 1.8901e-03 0.6999 0.4852
## Raw.Material -6.5596e-05 1.0808e-04 -0.6069 0.5450
## Production.Cost 2.0538e-05 4.0459e-04 0.0508 0.9596
## SGA 1.4400e-04 3.5949e-04 0.4006 0.6894
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 42.476
## Residual Sum of Squares: 13.111
## R-Squared: 0.69133
## Adj. R-Squared: 0.67946
## F-statistic: 58.232 on 5 and 130 DF, p-value: < 2.22e-16
within_A<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = A_pd,
model = "within")
summary(within_A)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = A_pd, model = "within")
##
## Unbalanced Panel: n = 4, T = 33-35, N = 136
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.699226 -0.220617 0.026781 0.197826 0.869013
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Volume 3.4661e-04 3.0571e-05 11.3378 <2e-16 ***
## Raw.Material -7.0374e-05 1.0930e-04 -0.6439 0.5208
## Production.Cost 1.7719e-05 4.0781e-04 0.0434 0.9654
## SGA 1.5709e-04 3.6188e-04 0.4341 0.6650
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 42.014
## Residual Sum of Squares: 13.037
## R-Squared: 0.6897
## Adj. R-Squared: 0.67274
## F-statistic: 71.1275 on 4 and 128 DF, p-value: < 2.22e-16
##
## F test for individual effects
##
## data: log(Price) ~ Volume + Transport.Cost + Raw.Material + Production.Cost + ...
## F = 0.36534, df1 = 2, df2 = 128, p-value = 0.6947
## alternative hypothesis: significant effects
Dada esta comprobacion nos quedamos con el metodo de efectos agrupados
Basados en el modelo escogido, las variables estadisticamente significativas son Volumey el intercepto con nivel de confianza mayor 99%
pooled_B<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = B_pd,
model = "pooling")
summary(pooled_B)## Pooling Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = B_pd, model = "pooling")
##
## Unbalanced Panel: n = 4, T = 32-36, N = 136
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.7202611 -0.1640933 -0.0042783 0.1849294 0.7399327
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 9.8102e+00 1.1923e-01 82.2771 < 2.2e-16 ***
## Volume 3.3800e-04 3.8671e-05 8.7403 1.013e-14 ***
## Transport.Cost 1.0370e-04 1.7471e-03 0.0594 0.9528
## Raw.Material -3.5747e-05 1.1434e-04 -0.3126 0.7551
## Production.Cost -3.4812e-04 4.2301e-04 -0.8230 0.4120
## SGA 3.2620e-04 3.6585e-04 0.8916 0.3743
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 33.815
## Residual Sum of Squares: 11.225
## R-Squared: 0.66804
## Adj. R-Squared: 0.65528
## F-statistic: 52.3237 on 5 and 130 DF, p-value: < 2.22e-16
within_B<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = B_pd,
model = "within")
summary(within_B)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = B_pd, model = "within")
##
## Unbalanced Panel: n = 4, T = 32-36, N = 136
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.7533357 -0.1424054 0.0035632 0.1804691 0.7808767
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Volume 3.3601e-04 3.8874e-05 8.6437 1.904e-14 ***
## Raw.Material -3.5898e-05 1.1489e-04 -0.3125 0.7552
## Production.Cost -2.9911e-04 4.2716e-04 -0.7002 0.4851
## SGA 3.2123e-04 3.6707e-04 0.8751 0.3831
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 32.912
## Residual Sum of Squares: 11.123
## R-Squared: 0.66204
## Adj. R-Squared: 0.64355
## F-statistic: 62.685 on 4 and 128 DF, p-value: < 2.22e-16
##
## F test for individual effects
##
## data: log(Price) ~ Volume + Transport.Cost + Raw.Material + Production.Cost + ...
## F = 0.58773, df1 = 2, df2 = 128, p-value = 0.5571
## alternative hypothesis: significant effects
Dada esta comprobacion nos quedamos con el metodo de efectos agrupados
Basados en el modelo escogido, las variables estadisticamente significativas son Volume y el intercepto con nivel de confianza mayor 99%
pooled_C<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = C_pd,
model = "pooling")
summary(pooled_C)## Pooling Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = C_pd, model = "pooling")
##
## Unbalanced Panel: n = 4, T = 35-36, N = 141
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.796882 -0.183194 0.010502 0.198157 0.839158
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.0310e+01 1.4104e-01 73.1008 < 2.2e-16 ***
## Volume 3.5465e-04 5.0497e-05 7.0233 9.645e-11 ***
## Transport.Cost -1.8529e-03 1.8939e-03 -0.9784 0.32965
## Raw.Material -3.2348e-05 6.7488e-05 -0.4793 0.63249
## Production.Cost 4.9817e-04 2.9349e-04 1.6974 0.09192 .
## SGA -3.9967e-04 2.7825e-04 -1.4364 0.15321
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 38.915
## Residual Sum of Squares: 14.306
## R-Squared: 0.63237
## Adj. R-Squared: 0.61875
## F-statistic: 46.4434 on 5 and 135 DF, p-value: < 2.22e-16
within_C<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = C_pd,
model = "within")
summary(within_C)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = C_pd, model = "within")
##
## Unbalanced Panel: n = 4, T = 35-36, N = 141
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.808701 -0.175632 0.027536 0.201136 0.824960
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Volume 3.5501e-04 5.0844e-05 6.9823 1.251e-10 ***
## Raw.Material -3.4456e-05 6.8082e-05 -0.5061 0.61363
## Production.Cost 5.0418e-04 2.9580e-04 1.7044 0.09063 .
## SGA -3.9862e-04 2.8062e-04 -1.4205 0.15781
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 38.492
## Residual Sum of Squares: 14.28
## R-Squared: 0.62901
## Adj. R-Squared: 0.60948
## F-statistic: 56.3745 on 4 and 133 DF, p-value: < 2.22e-16
##
## F test for individual effects
##
## data: log(Price) ~ Volume + Transport.Cost + Raw.Material + Production.Cost + ...
## F = 0.12026, df1 = 2, df2 = 133, p-value = 0.8868
## alternative hypothesis: significant effects
Dada esta comprobacion nos quedamos con el metodo de efectos agrupados
Basados en el modelo escogido, las variables estadisticamente significativas son Volume e Intercepto con nivel de confianza mayor 99% de confianza y Production Cost con nivel de confianza de 90% a 95%
pooled_D<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = D_pd,
model = "pooling")
summary(pooled_D)## Pooling Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = D_pd, model = "pooling")
##
## Unbalanced Panel: n = 4, T = 32-36, N = 137
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.933980 -0.148718 0.030107 0.213680 0.726757
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.0242e+01 1.7024e-01 60.1582 < 2.2e-16 ***
## Volume 3.0681e-04 3.4821e-05 8.8110 6.508e-15 ***
## Transport.Cost -1.1012e-03 2.0521e-03 -0.5366 0.5924
## Raw.Material -2.8760e-05 8.1266e-05 -0.3539 0.7240
## Production.Cost -1.0130e-04 2.7369e-04 -0.3701 0.7119
## SGA 2.8273e-04 2.7611e-04 1.0239 0.3077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 41.166
## Residual Sum of Squares: 15.286
## R-Squared: 0.62867
## Adj. R-Squared: 0.61449
## F-statistic: 44.3568 on 5 and 131 DF, p-value: < 2.22e-16
within_D<-plm(log(Price)~Volume+Transport.Cost+Raw.Material+Production.Cost+SGA,
data = D_pd,
model = "within")
summary(within_D)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = log(Price) ~ Volume + Transport.Cost + Raw.Material +
## Production.Cost + SGA, data = D_pd, model = "within")
##
## Unbalanced Panel: n = 4, T = 32-36, N = 137
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.942963 -0.159131 0.030261 0.211090 0.724040
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## Volume 3.0768e-04 3.5286e-05 8.7198 1.191e-14 ***
## Raw.Material -2.9046e-05 8.1940e-05 -0.3545 0.7236
## Production.Cost -1.0180e-04 2.7587e-04 -0.3690 0.7127
## SGA 2.8047e-04 2.7935e-04 1.0040 0.3173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 38.507
## Residual Sum of Squares: 15.28
## R-Squared: 0.6032
## Adj. R-Squared: 0.58167
## F-statistic: 49.0256 on 4 and 129 DF, p-value: < 2.22e-16
##
## F test for individual effects
##
## data: log(Price) ~ Volume + Transport.Cost + Raw.Material + Production.Cost + ...
## F = 0.028547, df1 = 2, df2 = 129, p-value = 0.9719
## alternative hypothesis: significant effects
Dada esta comprobacion nos quedamos con el metodo de efectos agrupados
Basados en el modelo escogido, las variables estadisticamente significativas son Volume y el intercepto con nivel de confianza mayor 99%
##
## Augmented Dickey-Fuller Test
##
## data: ProductoA_total$Price
## Dickey-Fuller = -2.8298, Lag order = 3, p-value = 0.2503
## alternative hypothesis: stationary
# Debido a que tenemos una serie no estacionaria buscaremos una forma de hacerla estacionaria
adf.test(log(ProductoA_total$Price)) # El valor es mayor a 0.05##
## Augmented Dickey-Fuller Test
##
## data: log(ProductoA_total$Price)
## Dickey-Fuller = -3.4393, Lag order = 3, p-value = 0.06755
## alternative hypothesis: stationary
Debido a que obtuvimos valores mayores a 0.05, optamos por usar ARIMA ya que este modelo se ocupa de la no estacionaridad en la serie de tiempo
ProductA_ts<-ts(ProductoA_total$Price,frequency=12,start=c(2019,1))
ProductA_dec<-decompose(ProductA_ts)
plot(ProductA_dec)
### ARIMA
##
## Call:
## arima(x = ProductA_ts, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## -0.1992 -0.7505
## s.e. 0.2256 0.2039
##
## sigma^2 estimated as 3.573e+09: log likelihood = -435.18, aic = 876.36
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -809.9205 58937.09 46878.88 -15.42174 34.91697 0.8034097
## ACF1
## Training set 0.03004559
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)
## Q* = 4.5048, df = 5, p-value = 0.4792
##
## Model df: 2. Total lags used: 7
##
## Augmented Dickey-Fuller Test
##
## data: A_Arima$residuals
## Dickey-Fuller = -3.821, Lag order = 3, p-value = 0.03023
## alternative hypothesis: stationary
Los residuales del modelo son estacionarios y no muestran autocorrelacion serial
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2022 203022.6 126420.3 279624.9 85869.47 320175.7
## Feb 2022 217463.6 140764.4 294162.8 100162.34 334764.9
## Mar 2022 214587.3 135725.0 293449.5 93977.85 335196.7
## Apr 2022 215160.2 134797.3 295523.0 92255.78 338064.6
## May 2022 215046.1 133099.6 296992.5 89719.72 340372.4
## Jun 2022 215068.8 131590.6 298547.0 87399.93 342737.6
##
## Augmented Dickey-Fuller Test
##
## data: ProductoD_total$Price
## Dickey-Fuller = -4.0222, Lag order = 3, p-value = 0.02002
## alternative hypothesis: stationary
Obtuvimos valores con series estacionarias
ProductD_ts<-ts(ProductoD_total$Price,frequency=12,start=c(2019,1))
ProductD_dec<-decompose(ProductD_ts)
plot(ProductD_dec)
### ARIMA
##
## Call:
## arima(x = ProductD_ts, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## -0.0252 -0.9999
## s.e. 0.1711 0.0856
##
## sigma^2 estimated as 5.562e+09: log likelihood = -444.16, aic = 894.33
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -3529.927 73536.3 58347.9 -11.56031 27.37968 0.6481655 -0.01538862
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)
## Q* = 6.0858, df = 5, p-value = 0.298
##
## Model df: 2. Total lags used: 7
##
## Augmented Dickey-Fuller Test
##
## data: D_ARIMA$residuals
## Dickey-Fuller = -4.0733, Lag order = 3, p-value = 0.01809
## alternative hypothesis: stationary
Los residuales del modelo son estacionarios y no muestran autocorrelacion serial
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2022 256038.7 159144.2 352933.2 107851.3 404226.0
## Feb 2022 255064.4 158205.2 351923.6 106931.0 403197.7
## Mar 2022 255088.9 158228.1 351949.8 106953.1 403224.8
## Apr 2022 255088.3 158227.5 351949.1 106952.5 403224.1
## May 2022 255088.3 158227.6 351949.1 106952.6 403224.1
## Jun 2022 255088.3 158227.6 351949.1 106952.6 403224.1