bd <- read.csv("/Users/marcogonzalez/Desktop/Programacion Bootcamp/Datos Arca Continental Original.csv")
summary(bd)
## ID AƱo Territorio Sub.Territorio
## Min. : 1 Min. :2016 Length:466509 Length:466509
## 1st Qu.:116628 1st Qu.:2017 Class :character Class :character
## Median :233255 Median :2018 Mode :character Mode :character
## Mean :233255 Mean :2018
## 3rd Qu.:349882 3rd Qu.:2019
## Max. :466509 Max. :2019
## CEDI Cliente Nombre TamaƱo.Cte.Industria
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Segmento.Det Marca Presentacion TamaƱo
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Retornable_NR Enero Febrero Marzo
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Abril Mayo Junio Julio
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Agosto Septiembre Octubre Noviembre
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Diciembre
## Length:466509
## Class :character
## Mode :character
##
##
##
str(bd)
## 'data.frame': 466509 obs. of 25 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ AƱo : int 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
## $ Territorio : chr "Guadalajara" "Guadalajara" "Guadalajara" "Guadalajara" ...
## $ Sub.Territorio : chr "Belenes" "Belenes" "Belenes" "Belenes" ...
## $ CEDI : chr "Suc. Belenes" "Suc. Belenes" "Suc. Belenes" "Suc. Belenes" ...
## $ Cliente : chr "77737" "77737" "77737" "77737" ...
## $ Nombre : chr "ABARR" "ABARR" "ABARR" "ABARR" ...
## $ TamaƱo.Cte.Industria: chr "Extra Grande" "Extra Grande" "Extra Grande" "Extra Grande" ...
## $ Segmento.Det : chr "Agua Mineral" "Agua Purificada" "Agua Purificada" "Agua Saborizada" ...
## $ Marca : chr "Topo Chico A.M." "Ciel Agua Purificada" "Ciel Agua Purificada" "Ciel Exprim" ...
## $ Presentacion : chr "600 ml NR" "1 Ltro. N.R." "1.5 Lts. NR" "600 ml NR" ...
## $ TamaƱo : chr "Individual" "Individual" "Individual" "Individual" ...
## $ Retornable_NR : chr "No Retornable" "No Retornable" "No Retornable" "No Retornable" ...
## $ Enero : chr "" "" "" "" ...
## $ Febrero : chr "" "2" "" "" ...
## $ Marzo : chr "" "8" "3" "" ...
## $ Abril : chr "" "4" "6" "" ...
## $ Mayo : chr "" "4" "3" "" ...
## $ Junio : chr "" "2" "3" "" ...
## $ Julio : chr "" "2" "3" "" ...
## $ Agosto : chr "" "2" "3" "" ...
## $ Septiembre : chr "" "2" "3" "" ...
## $ Octubre : chr "" "2" "3" "" ...
## $ Noviembre : chr "" "4" "3" "" ...
## $ Diciembre : chr "1" "2" "3" "1" ...
bd$Cliente <- as.integer(bd$Cliente)
## Warning: NAs introduced by coercion
bd$Enero <- as.integer(bd$Enero)
## Warning: NAs introduced by coercion
bd$Febrero <- as.integer(bd$Febrero)
## Warning: NAs introduced by coercion
bd$Marzo <- as.integer(bd$Marzo)
## Warning: NAs introduced by coercion
bd$Abril <- as.integer(bd$Abril)
## Warning: NAs introduced by coercion
bd$Mayo <- as.integer(bd$Mayo)
## Warning: NAs introduced by coercion
bd$Junio <- as.integer(bd$Junio)
## Warning: NAs introduced by coercion
bd$Julio <- as.integer(bd$Julio)
## Warning: NAs introduced by coercion
bd$Agosto <- as.integer(bd$Agosto)
## Warning: NAs introduced by coercion
bd$Septiembre <- as.integer(bd$Septiembre)
## Warning: NAs introduced by coercion
bd$Octubre <- as.integer(bd$Octubre)
## Warning: NAs introduced by coercion
bd$Noviembre <- as.integer(bd$Noviembre)
## Warning: NAs introduced by coercion
bd$Diciembre <- as.integer(bd$Diciembre)
## Warning: NAs introduced by coercion
#library(dplyr)
#bd2 <- filter(bd, TamaƱo.Cte.Industria=="Micro")
#summary(bd2)
regresion <- lm(Enero~Segmento.Det + Presentacion + TamaƱo + Retornable_NR, data = bd)
summary(regresion)
##
## Call:
## lm(formula = Enero ~ Segmento.Det + Presentacion + TamaƱo +
## Retornable_NR, data = bd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -71.26 -5.95 -0.86 1.33 966.00
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.15243 0.63718 0.239 0.810933
## Segmento.DetAgua Purificada 4.66404 0.70937 6.575 4.88e-11 ***
## Segmento.DetAgua Saborizada -1.42197 0.74162 -1.917 0.055191 .
## Segmento.DetBebidas de Fruta 0.98787 0.63032 1.567 0.117056
## Segmento.DetBebidas de Soya 0.94708 2.50157 0.379 0.704989
## Segmento.DetBebidas Energeticas 11.90635 0.93125 12.785 < 2e-16 ***
## Segmento.DetCafe Listo Para Bebe -1.71183 2.02938 -0.844 0.398937
## Segmento.DetColas Light 7.50047 0.63610 11.791 < 2e-16 ***
## Segmento.DetColas Regular 21.27155 0.61327 34.686 < 2e-16 ***
## Segmento.DetIsotónicos Light -2.19258 1.58286 -1.385 0.165992
## Segmento.DetIsotónicos Regular -0.71862 0.69519 -1.034 0.301277
## Segmento.DetJugos y NƩctares 2.15427 0.77962 2.763 0.005723 **
## Segmento.DetLeche UHT Especializ -1.26888 1.74705 -0.726 0.467657
## Segmento.DetLeche UHT Regular -1.37030 1.72970 -0.792 0.428234
## Segmento.DetLeche UHT Saborizada 0.99574 2.78360 0.358 0.720557
## Segmento.DetPolvos 1.21821 19.83101 0.061 0.951017
## Segmento.DetSabores Light -0.97784 0.75695 -1.292 0.196418
## Segmento.DetSabores Regular -2.05300 0.58646 -3.501 0.000464 ***
## Segmento.DetTĆ© Light -1.73539 3.44607 -0.504 0.614553
## Segmento.DetTĆ© Regular -0.39498 0.72665 -0.544 0.586743
## Presentacion1 Ltro. Tetra 3.13124 1.49935 2.088 0.036763 *
## Presentacion1.250 Lts NR 0.26963 0.57907 0.466 0.641486
## Presentacion1.5 Lts. NR 2.56855 0.33239 7.727 1.10e-14 ***
## Presentacion1.5 Lts. Ret 11.58011 0.47716 24.269 < 2e-16 ***
## Presentacion1.750 Lts NR -8.64830 0.71312 -12.127 < 2e-16 ***
## Presentacion100 ml NR Tetra -3.98617 2.42464 -1.644 0.100172
## Presentacion12 Oz. NR Pet -12.69190 0.59197 -21.440 < 2e-16 ***
## Presentacion12 Oz. NR Vidrio 2.71821 5.88685 0.462 0.644266
## Presentacion12 Oz. Ret -3.78984 0.71010 -5.337 9.46e-08 ***
## Presentacion125 ml NR Tetra -4.05397 1.79426 -2.259 0.023859 *
## Presentacion2 Lts. NR 5.57947 0.33367 16.721 < 2e-16 ***
## Presentacion2 Lts. Ret -4.07079 0.76137 -5.347 8.97e-08 ***
## Presentacion2.5 Lts. NR 12.85447 0.41190 31.208 < 2e-16 ***
## Presentacion2.5 Lts. Ret Pet 49.83402 0.46392 107.420 < 2e-16 ***
## Presentacion200 ml Tetra -3.39505 2.77434 -1.224 0.221055
## Presentacion235 ml NR Vid -15.64521 0.61869 -25.288 < 2e-16 ***
## Presentacion237 ml NR Pet -4.92657 1.25527 -3.925 8.69e-05 ***
## Presentacion237 ml NR Vid -9.62600 3.54052 -2.719 0.006552 **
## Presentacion250 ml Tetra -4.00227 0.78509 -5.098 3.44e-07 ***
## Presentacion250 ml. NR PET -1.26010 0.53056 -2.375 0.017549 *
## Presentacion250 ML. NR VID -4.02729 0.96716 -4.164 3.13e-05 ***
## Presentacion3 Lts. NR 7.89587 3.38598 2.332 0.019705 *
## Presentacion300 ML. NR PET -3.53675 0.84617 -4.180 2.92e-05 ***
## Presentacion350 ML NR PET -7.66011 10.59819 -0.723 0.469819
## Presentacion355 Ml NR Pet -5.25305 2.19872 -2.389 0.016889 *
## Presentacion400 ml NR -0.77727 0.52836 -1.471 0.141267
## Presentacion413 ml NR VId -1.59460 0.73113 -2.181 0.029184 *
## Presentacion473 ml NR -3.13329 2.31070 -1.356 0.175102
## Presentacion5 Lts. NR 4.87934 1.31673 3.706 0.000211 ***
## Presentacion500 ml NR PET -7.85446 0.62317 -12.604 < 2e-16 ***
## Presentacion500 ml NR Vidrio -4.88632 0.54094 -9.033 < 2e-16 ***
## Presentacion500 ml Ret 9.63179 0.48915 19.691 < 2e-16 ***
## Presentacion6.5 Oz. Ret -22.22859 2.06983 -10.739 < 2e-16 ***
## Presentacion600 ml NR -0.01699 0.42048 -0.040 0.967778
## Presentacion710 ml NR -13.09879 2.48719 -5.267 1.39e-07 ***
## Presentacion8 Oz. NR -15.54148 0.73295 -21.204 < 2e-16 ***
## Presentacion946 ml NR Tetra 0.74832 2.19366 0.341 0.733007
## PresentacionBag In Box 0.43004 16.30382 0.026 0.978957
## PresentacionLata -14.03884 0.60528 -23.194 < 2e-16 ***
## PresentacionLata 16 Oz. -18.02910 0.71216 -25.316 < 2e-16 ***
## PresentacionLata 222 ml -2.52967 7.51573 -0.337 0.736431
## PresentacionLata 235 ml -11.85441 0.56203 -21.092 < 2e-16 ***
## PresentacionLata 237 ml -14.96592 9.38096 -1.595 0.110635
## PresentacionLata 335 ml -3.95048 2.05074 -1.926 0.054060 .
## PresentacionLata 340 ml -14.90506 1.77138 -8.414 < 2e-16 ***
## PresentacionLata 450 ml -14.05582 1.55716 -9.027 < 2e-16 ***
## PresentacionLata 453 ml -3.29515 2.14134 -1.539 0.123848
## PresentacionLATA 680 ML -1.29590 3.50407 -0.370 0.711512
## PresentacionLata 8 OZ. -0.66468 2.28915 -0.290 0.771540
## PresentacionSobres 907 grs NA NA NA NA
## TamaƱoIndividual 3.12936 0.49310 6.346 2.21e-10 ***
## Retornable_NRRetornable NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.03 on 232887 degrees of freedom
## (233552 observations deleted due to missingness)
## Multiple R-squared: 0.2019, Adjusted R-squared: 0.2016
## F-statistic: 853.7 on 69 and 232887 DF, p-value: < 2.2e-16
regresion_ajustada<- lm(Enero ~ Segmento.Det + Presentacion + TamaƱo, data = bd)
summary(regresion_ajustada)
##
## Call:
## lm(formula = Enero ~ Segmento.Det + Presentacion + TamaƱo, data = bd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -71.26 -5.95 -0.86 1.33 966.00
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.15243 0.63718 0.239 0.810933
## Segmento.DetAgua Purificada 4.66404 0.70937 6.575 4.88e-11 ***
## Segmento.DetAgua Saborizada -1.42197 0.74162 -1.917 0.055191 .
## Segmento.DetBebidas de Fruta 0.98787 0.63032 1.567 0.117056
## Segmento.DetBebidas de Soya 0.94708 2.50157 0.379 0.704989
## Segmento.DetBebidas Energeticas 11.90635 0.93125 12.785 < 2e-16 ***
## Segmento.DetCafe Listo Para Bebe -1.71183 2.02938 -0.844 0.398937
## Segmento.DetColas Light 7.50047 0.63610 11.791 < 2e-16 ***
## Segmento.DetColas Regular 21.27155 0.61327 34.686 < 2e-16 ***
## Segmento.DetIsotónicos Light -2.19258 1.58286 -1.385 0.165992
## Segmento.DetIsotónicos Regular -0.71862 0.69519 -1.034 0.301277
## Segmento.DetJugos y NƩctares 2.15427 0.77962 2.763 0.005723 **
## Segmento.DetLeche UHT Especializ -1.26888 1.74705 -0.726 0.467657
## Segmento.DetLeche UHT Regular -1.37030 1.72970 -0.792 0.428234
## Segmento.DetLeche UHT Saborizada 0.99574 2.78360 0.358 0.720557
## Segmento.DetPolvos 1.21821 19.83101 0.061 0.951017
## Segmento.DetSabores Light -0.97784 0.75695 -1.292 0.196418
## Segmento.DetSabores Regular -2.05300 0.58646 -3.501 0.000464 ***
## Segmento.DetTĆ© Light -1.73539 3.44607 -0.504 0.614553
## Segmento.DetTĆ© Regular -0.39498 0.72665 -0.544 0.586743
## Presentacion1 Ltro. Tetra 3.13124 1.49935 2.088 0.036763 *
## Presentacion1.250 Lts NR 0.26963 0.57907 0.466 0.641486
## Presentacion1.5 Lts. NR 2.56855 0.33239 7.727 1.10e-14 ***
## Presentacion1.5 Lts. Ret 11.58011 0.47716 24.269 < 2e-16 ***
## Presentacion1.750 Lts NR -8.64830 0.71312 -12.127 < 2e-16 ***
## Presentacion100 ml NR Tetra -3.98617 2.42464 -1.644 0.100172
## Presentacion12 Oz. NR Pet -12.69190 0.59197 -21.440 < 2e-16 ***
## Presentacion12 Oz. NR Vidrio 2.71821 5.88685 0.462 0.644266
## Presentacion12 Oz. Ret -3.78984 0.71010 -5.337 9.46e-08 ***
## Presentacion125 ml NR Tetra -4.05397 1.79426 -2.259 0.023859 *
## Presentacion2 Lts. NR 5.57947 0.33367 16.721 < 2e-16 ***
## Presentacion2 Lts. Ret -4.07079 0.76137 -5.347 8.97e-08 ***
## Presentacion2.5 Lts. NR 12.85447 0.41190 31.208 < 2e-16 ***
## Presentacion2.5 Lts. Ret Pet 49.83402 0.46392 107.420 < 2e-16 ***
## Presentacion200 ml Tetra -3.39505 2.77434 -1.224 0.221055
## Presentacion235 ml NR Vid -15.64521 0.61869 -25.288 < 2e-16 ***
## Presentacion237 ml NR Pet -4.92657 1.25527 -3.925 8.69e-05 ***
## Presentacion237 ml NR Vid -9.62600 3.54052 -2.719 0.006552 **
## Presentacion250 ml Tetra -4.00227 0.78509 -5.098 3.44e-07 ***
## Presentacion250 ml. NR PET -1.26010 0.53056 -2.375 0.017549 *
## Presentacion250 ML. NR VID -4.02729 0.96716 -4.164 3.13e-05 ***
## Presentacion3 Lts. NR 7.89587 3.38598 2.332 0.019705 *
## Presentacion300 ML. NR PET -3.53675 0.84617 -4.180 2.92e-05 ***
## Presentacion350 ML NR PET -7.66011 10.59819 -0.723 0.469819
## Presentacion355 Ml NR Pet -5.25305 2.19872 -2.389 0.016889 *
## Presentacion400 ml NR -0.77727 0.52836 -1.471 0.141267
## Presentacion413 ml NR VId -1.59460 0.73113 -2.181 0.029184 *
## Presentacion473 ml NR -3.13329 2.31070 -1.356 0.175102
## Presentacion5 Lts. NR 4.87934 1.31673 3.706 0.000211 ***
## Presentacion500 ml NR PET -7.85446 0.62317 -12.604 < 2e-16 ***
## Presentacion500 ml NR Vidrio -4.88632 0.54094 -9.033 < 2e-16 ***
## Presentacion500 ml Ret 9.63179 0.48915 19.691 < 2e-16 ***
## Presentacion6.5 Oz. Ret -22.22859 2.06983 -10.739 < 2e-16 ***
## Presentacion600 ml NR -0.01699 0.42048 -0.040 0.967778
## Presentacion710 ml NR -13.09879 2.48719 -5.267 1.39e-07 ***
## Presentacion8 Oz. NR -15.54148 0.73295 -21.204 < 2e-16 ***
## Presentacion946 ml NR Tetra 0.74832 2.19366 0.341 0.733007
## PresentacionBag In Box 0.43004 16.30382 0.026 0.978957
## PresentacionLata -14.03884 0.60528 -23.194 < 2e-16 ***
## PresentacionLata 16 Oz. -18.02910 0.71216 -25.316 < 2e-16 ***
## PresentacionLata 222 ml -2.52967 7.51573 -0.337 0.736431
## PresentacionLata 235 ml -11.85441 0.56203 -21.092 < 2e-16 ***
## PresentacionLata 237 ml -14.96592 9.38096 -1.595 0.110635
## PresentacionLata 335 ml -3.95048 2.05074 -1.926 0.054060 .
## PresentacionLata 340 ml -14.90506 1.77138 -8.414 < 2e-16 ***
## PresentacionLata 450 ml -14.05582 1.55716 -9.027 < 2e-16 ***
## PresentacionLata 453 ml -3.29515 2.14134 -1.539 0.123848
## PresentacionLATA 680 ML -1.29590 3.50407 -0.370 0.711512
## PresentacionLata 8 OZ. -0.66468 2.28915 -0.290 0.771540
## PresentacionSobres 907 grs NA NA NA NA
## TamaƱoIndividual 3.12936 0.49310 6.346 2.21e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.03 on 232887 degrees of freedom
## (233552 observations deleted due to missingness)
## Multiple R-squared: 0.2019, Adjusted R-squared: 0.2016
## F-statistic: 853.7 on 69 and 232887 DF, p-value: < 2.2e-16
plot(regresion_ajustada)
### Contruir un modelo de prediccion
datos_nuevos <- data.frame(Segmento.Det= "Colas Regular" , Presentacion= "2.5 Lts. Ret Pet", TamaƱo= "Familiar")
predict(regresion_ajustada,datos_nuevos)
## Warning in predict.lm(regresion_ajustada, datos_nuevos): prediction from a
## rank-deficient fit may be misleading
## 1
## 71.258
En conclusion podemos realizar la regresiones lineales para generar modelos predictivos de variables de interes como las ventas. En este caso, se recomienda tener mas variables y de mayor impacto sobre las ventas, para que el modelo sea mas confiable (actualmente 38%)