bd <- read.csv("/Users/hugoenrique/Desktop/4to semestre/Semanas Tec/Bootcamp de programación/Datos Arca Continental Original.csv")
#Convertir tipos de variables
bd$Cliente <- as.integer(bd$Cliente)
## Warning: NAs introduced by coercion
bd$Enero <- as.integer(bd$Enero)
## Warning: NAs introduced by coercion
bd$Febrero <- as.integer(bd$Febrero)
## Warning: NAs introduced by coercion
bd$Marzo <- as.integer(bd$Marzo)
## Warning: NAs introduced by coercion
bd$Abril <- as.integer(bd$Abril)
## Warning: NAs introduced by coercion
bd$Mayo <- as.integer(bd$Mayo)
## Warning: NAs introduced by coercion
bd$Junio <- as.integer(bd$Junio)
## Warning: NAs introduced by coercion
bd$Julio <- as.integer(bd$Julio)
## Warning: NAs introduced by coercion
bd$Agosto <- as.integer(bd$Agosto)
## Warning: NAs introduced by coercion
bd$Septiembre <- as.integer(bd$Septiembre)
## Warning: NAs introduced by coercion
bd$Octubre <- as.integer(bd$Octubre)
## Warning: NAs introduced by coercion
bd$Noviembre <- as.integer(bd$Noviembre)
## Warning: NAs introduced by coercion
bd$Diciembre <- as.integer(bd$Diciembre)
## Warning: NAs introduced by coercion
library(dplyr) #Librería que contiene la función filter
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bd2 <- filter(bd, Tamaño.Cte.Industria == "Extra Grande")
summary(bd2)
## ID Año Territorio Sub.Territorio
## Min. : 1 Min. :2016 Length:230190 Length:230190
## 1st Qu.: 57548 1st Qu.:2016 Class :character Class :character
## Median :147870 Median :2017 Mode :character Mode :character
## Mean :161295 Mean :2017
## 3rd Qu.:240001 3rd Qu.:2018
## Max. :317230 Max. :2019
##
## CEDI Cliente Nombre Tamaño.Cte.Industria
## Length:230190 Min. : 4 Length:230190 Length:230190
## Class :character 1st Qu.: 2859 Class :character Class :character
## Mode :character Median : 6251 Mode :character Mode :character
## Mean :22028
## 3rd Qu.: 9743
## Max. :99998
##
## Segmento.Det Marca Presentacion Tamaño
## Length:230190 Length:230190 Length:230190 Length:230190
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Retornable_NR Enero Febrero Marzo
## Length:230190 Min. :-19.00 Min. :-11.00 Min. :-32.00
## Class :character 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Mode :character Median : 3.00 Median : 3.00 Median : 3.00
## Mean : 12.52 Mean : 12.03 Mean : 13.88
## 3rd Qu.: 7.00 3rd Qu.: 7.00 3rd Qu.: 8.00
## Max. :999.00 Max. :986.00 Max. :986.00
## NA's :100876 NA's :98670 NA's :95607
## Abril Mayo Junio Julio
## Min. :-70.00 Min. :-106.00 Min. :-211.00 Min. :-60.00
## 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 3.00 Median : 3.00 Median : 3.00 Median : 3.00
## Mean : 13.99 Mean : 14.97 Mean : 14.51 Mean : 14.03
## 3rd Qu.: 8.00 3rd Qu.: 8.00 3rd Qu.: 8.00 3rd Qu.: 8.00
## Max. :993.00 Max. : 991.00 Max. : 998.00 Max. :993.00
## NA's :93209 NA's :89243 NA's :87624 NA's :91887
## Agosto Septiembre Octubre Noviembre
## Min. :-211.00 Min. :-527.00 Min. :-14 Min. :-25.00
## 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1 1st Qu.: 1.00
## Median : 3.00 Median : 3.00 Median : 3 Median : 3.00
## Mean : 14.43 Mean : 13.82 Mean : 14 Mean : 13.63
## 3rd Qu.: 8.00 3rd Qu.: 8.00 3rd Qu.: 8 3rd Qu.: 8.00
## Max. : 999.00 Max. : 993.00 Max. :998 Max. :991.00
## NA's :91145 NA's :127280 NA's :128038 NA's :127216
## Diciembre
## Min. :-28.00
## 1st Qu.: 1.00
## Median : 3.00
## Mean : 14.97
## 3rd Qu.: 8.00
## Max. :997.00
## NA's :129456
regresion <- lm(Enero ~ Marca + Presentacion + Tamaño + Retornable_NR, data=bd2)
summary(regresion)
##
## Call:
## lm(formula = Enero ~ Marca + Presentacion + Tamaño + Retornable_NR,
## data = bd2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -134.74 -7.44 -0.81 2.22 946.88
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.66259 4.64269 1.004 0.315243
## MarcaAdeS Lácteo -0.17188 3.03047 -0.057 0.954770
## MarcaBarista Bros -9.72333 5.89913 -1.648 0.099301 .
## MarcaBebere -8.90925 5.58393 -1.596 0.110599
## MarcaBurn 6.71679 4.97761 1.349 0.177211
## MarcaCafé Blak -5.63888 24.12629 -0.234 0.815200
## MarcaCiel Agua Purificada 1.29928 4.77803 0.272 0.785678
## MarcaCiel Exprim -8.61296 4.76977 -1.806 0.070962 .
## MarcaCiel Mineralizada -7.19574 4.96258 -1.450 0.147061
## MarcaCiel Mini -0.18236 5.07645 -0.036 0.971344
## MarcaCiel Saborizada -7.63824 5.24853 -1.455 0.145586
## MarcaCoca-Cola 26.98998 4.65596 5.797 6.77e-09 ***
## MarcaCoca-Cola Life -0.43330 4.87337 -0.089 0.929153
## MarcaCoca-Cola Light 5.54925 4.66428 1.190 0.234153
## MarcaCoca-Cola Light Sin -4.65286 13.47638 -0.345 0.729899
## MarcaCoca-Cola Sin Azúcar -3.93691 4.73680 -0.831 0.405901
## MarcaCoca-Cola Zero 0.51419 4.70044 0.109 0.912891
## MarcaDel Valle 0.40688 4.26362 0.095 0.923973
## MarcaDel Valle Bits -10.06815 33.76845 -0.298 0.765587
## MarcaDel Valle Blends -9.40293 5.54052 -1.697 0.089676 .
## MarcaDel Valle Nutridefen -8.07959 5.14415 -1.571 0.116270
## MarcaDel Valle Reserva -1.14813 4.73348 -0.243 0.808350
## MarcaDel Valle y Nada -4.38352 4.70899 -0.931 0.351915
## MarcaDelaware Punch -7.80462 4.72515 -1.652 0.098594 .
## MarcaFanta -9.29700 4.66647 -1.992 0.046340 *
## MarcaFanta Zero -7.48322 9.57833 -0.781 0.434648
## MarcaFresca -10.35976 4.67393 -2.216 0.026659 *
## MarcaFresca Zero -8.12905 14.43054 -0.563 0.573216
## MarcaFrutsi -6.95392 4.75257 -1.463 0.143418
## MarcaFuze Tea -5.86448 4.68003 -1.253 0.210177
## MarcaFuze Tea Light -8.18555 6.38765 -1.281 0.200032
## MarcaGlacéau 2.60462 5.08122 0.513 0.608234
## MarcaManzana Lift -10.65445 4.67212 -2.280 0.022584 *
## MarcaManzana Lift Zero -7.93857 13.47638 -0.589 0.555813
## MarcaMonster Energy 21.96235 4.93550 4.450 8.60e-06 ***
## MarcaPowerade -6.80672 4.73565 -1.437 0.150625
## MarcaPowerade Zero -8.71238 5.04294 -1.728 0.084055 .
## MarcaPulpy -9.15891 4.95080 -1.850 0.064318 .
## MarcaSanta Clara Deslacto -3.25577 4.70386 -0.692 0.488845
## MarcaSanta Clara Entera -3.40974 4.71455 -0.723 0.469536
## MarcaSanta Clara Light -3.48830 4.97293 -0.701 0.483019
## MarcaSanta Clara Saboriza 0.02659 2.24657 0.012 0.990556
## MarcaSenzao -7.43947 4.76920 -1.560 0.118787
## MarcaSidral Mundet -9.65667 4.68580 -2.061 0.039321 *
## MarcaSprite -8.25354 4.66490 -1.769 0.076848 .
## MarcaSprite Zero -6.60379 4.76965 -1.385 0.166194
## MarcaTopo Chico A.M. -5.26667 4.76989 -1.104 0.269531
## MarcaValle Frut -2.21865 4.67676 -0.474 0.635216
## Presentacion1 Ltro. Tetra 0.72937 2.61558 0.279 0.780357
## Presentacion1.250 Lts NR 4.77344 1.03636 4.606 4.11e-06 ***
## Presentacion1.5 Lts. NR 4.21685 0.52865 7.977 1.52e-15 ***
## Presentacion1.5 Lts. Ret 25.06474 0.82998 30.199 < 2e-16 ***
## Presentacion1.750 Lts NR -9.20094 1.45005 -6.345 2.23e-10 ***
## Presentacion100 ml NR Tetra -8.20390 3.51815 -2.332 0.019709 *
## Presentacion12 Oz. NR Pet -18.57227 0.93891 -19.781 < 2e-16 ***
## Presentacion12 Oz. NR Vidrio 4.35856 9.35280 0.466 0.641204
## Presentacion12 Oz. Ret -5.17095 1.08271 -4.776 1.79e-06 ***
## Presentacion125 ml NR Tetra -8.14949 2.73268 -2.982 0.002862 **
## Presentacion2 Lts. NR 7.47221 0.50629 14.759 < 2e-16 ***
## Presentacion2 Lts. Ret -9.58378 1.05712 -9.066 < 2e-16 ***
## Presentacion2.5 Lts. NR 20.13858 0.63760 31.585 < 2e-16 ***
## Presentacion2.5 Lts. Ret Pet 107.08248 0.83579 128.121 < 2e-16 ***
## Presentacion200 ml Tetra -8.86744 5.06137 -1.752 0.079779 .
## Presentacion235 ml NR Vid -19.66433 1.03838 -18.938 < 2e-16 ***
## Presentacion237 ml NR Pet -8.43601 2.35904 -3.576 0.000349 ***
## Presentacion237 ml NR Vid -8.51690 4.87278 -1.748 0.080492 .
## Presentacion250 ml Tetra -8.34515 2.12419 -3.929 8.55e-05 ***
## Presentacion250 ml. NR PET -0.07073 0.87469 -0.081 0.935555
## Presentacion250 ML. NR VID -8.86827 2.27036 -3.906 9.38e-05 ***
## Presentacion3 Lts. NR 11.11463 4.33186 2.566 0.010295 *
## Presentacion300 ML. NR PET -8.49065 1.54625 -5.491 4.00e-08 ***
## Presentacion350 ML NR PET -11.23816 14.96535 -0.751 0.452687
## Presentacion355 Ml NR Pet -8.59468 3.16446 -2.716 0.006609 **
## Presentacion400 ml NR -0.15327 0.83693 -0.183 0.854695
## Presentacion413 ml NR VId -4.90060 2.02357 -2.422 0.015447 *
## Presentacion473 ml NR NA NA NA NA
## Presentacion5 Lts. NR 4.53177 1.89526 2.391 0.016799 *
## Presentacion500 ml NR PET -11.01752 0.96783 -11.384 < 2e-16 ***
## Presentacion500 ml NR Vidrio -2.67909 0.86458 -3.099 0.001944 **
## Presentacion500 ml Ret 19.56016 0.78045 25.063 < 2e-16 ***
## Presentacion6.5 Oz. Ret -34.39658 3.06160 -11.235 < 2e-16 ***
## Presentacion600 ml NR -0.34316 0.65026 -0.528 0.597686
## Presentacion710 ml NR -25.36696 3.07300 -8.255 < 2e-16 ***
## Presentacion8 Oz. NR -22.67582 1.07548 -21.084 < 2e-16 ***
## Presentacion946 ml NR Tetra -2.23861 4.06185 -0.551 0.581546
## PresentacionBag In Box -2.50000 30.52818 -0.082 0.934733
## PresentacionLata -19.13778 0.92378 -20.717 < 2e-16 ***
## PresentacionLata 16 Oz. -31.10298 1.13424 -27.422 < 2e-16 ***
## PresentacionLata 222 ml -3.94107 9.70367 -0.406 0.684639
## PresentacionLata 235 ml -16.63938 0.87871 -18.936 < 2e-16 ***
## PresentacionLata 237 ml -16.60567 11.97553 -1.387 0.165556
## PresentacionLata 335 ml -8.80217 3.33364 -2.640 0.008282 **
## PresentacionLata 340 ml -12.39084 3.21238 -3.857 0.000115 ***
## PresentacionLata 450 ml -15.69556 2.59987 -6.037 1.57e-09 ***
## PresentacionLata 453 ml -7.43202 3.08707 -2.407 0.016065 *
## PresentacionLATA 680 ML -2.64537 4.35917 -0.607 0.543950
## PresentacionLata 8 OZ. 0.14937 2.95475 0.051 0.959683
## PresentacionSobres 907 grs NA NA NA NA
## TamañoIndividual 5.47629 0.76418 7.166 7.75e-13 ***
## Retornable_NRRetornable NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.44 on 129217 degrees of freedom
## (100876 observations deleted due to missingness)
## Multiple R-squared: 0.3137, Adjusted R-squared: 0.3132
## F-statistic: 615.3 on 96 and 129217 DF, p-value: < 2.2e-16
regresion_ajustada <- lm(Enero ~ Segmento.Det + Presentacion + Tamaño, data=bd2)
summary(regresion_ajustada)
##
## Call:
## lm(formula = Enero ~ Segmento.Det + Presentacion + Tamaño, data = bd2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -134.74 -7.78 -0.68 2.06 946.95
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.21064 0.96559 -0.218 0.827313
## Segmento.DetAgua Purificada 7.18021 1.08684 6.607 3.95e-11 ***
## Segmento.DetAgua Saborizada -2.02992 1.12703 -1.801 0.071686 .
## Segmento.DetBebidas de Fruta 1.27376 0.95872 1.329 0.183981
## Segmento.DetBebidas de Soya 1.29113 4.19002 0.308 0.757973
## Segmento.DetBebidas Energeticas 17.47959 1.38683 12.604 < 2e-16 ***
## Segmento.DetCafe Listo Para Bebe -2.44721 3.72479 -0.657 0.511178
## Segmento.DetColas Light 8.82176 0.97005 9.094 < 2e-16 ***
## Segmento.DetColas Regular 32.51914 0.93706 34.703 < 2e-16 ***
## Segmento.DetIsotónicos Light -2.92410 2.08496 -1.402 0.160778
## Segmento.DetIsotónicos Regular -1.04661 1.06060 -0.987 0.323740
## Segmento.DetJugos y Néctares 2.42054 1.17378 2.062 0.039193 *
## Segmento.DetLeche UHT Especializ -1.24770 2.37196 -0.526 0.598876
## Segmento.DetLeche UHT Regular -1.42453 2.34389 -0.608 0.543347
## Segmento.DetLeche UHT Saborizada 1.35088 4.69472 0.288 0.773543
## Segmento.DetPolvos -0.09966 23.70067 -0.004 0.996645
## Segmento.DetSabores Light -0.87972 1.19067 -0.739 0.460003
## Segmento.DetSabores Regular -3.07199 0.89398 -3.436 0.000590 ***
## Segmento.DetTé Light -2.39726 4.44472 -0.539 0.589646
## Segmento.DetTé Regular -0.32936 1.13608 -0.290 0.771888
## Presentacion1 Ltro. Tetra 3.59452 1.93023 1.862 0.062573 .
## Presentacion1.250 Lts NR 4.11751 1.03245 3.988 6.66e-05 ***
## Presentacion1.5 Lts. NR 3.34411 0.51038 6.552 5.69e-11 ***
## Presentacion1.5 Lts. Ret 24.38983 0.82433 29.587 < 2e-16 ***
## Presentacion1.750 Lts NR -9.88768 1.44795 -6.829 8.60e-12 ***
## Presentacion100 ml NR Tetra -5.43968 3.41538 -1.593 0.111229
## Presentacion12 Oz. NR Pet -18.04815 0.92578 -19.495 < 2e-16 ***
## Presentacion12 Oz. NR Vidrio 4.63110 9.34927 0.495 0.620358
## Presentacion12 Oz. Ret -5.19985 1.07433 -4.840 1.30e-06 ***
## Presentacion125 ml NR Tetra -5.51123 2.33014 -2.365 0.018022 *
## Presentacion2 Lts. NR 6.74477 0.49471 13.634 < 2e-16 ***
## Presentacion2 Lts. Ret -9.64617 1.05264 -9.164 < 2e-16 ***
## Presentacion2.5 Lts. NR 19.04685 0.62620 30.416 < 2e-16 ***
## Presentacion2.5 Lts. Ret Pet 106.42655 0.83039 128.165 < 2e-16 ***
## Presentacion200 ml Tetra -4.65251 4.66424 -0.997 0.318531
## Presentacion235 ml NR Vid -19.77293 1.02879 -19.220 < 2e-16 ***
## Presentacion237 ml NR Pet -6.43656 2.26354 -2.844 0.004462 **
## Presentacion237 ml NR Vid -12.23275 4.66440 -2.623 0.008728 **
## Presentacion250 ml Tetra -4.81959 1.23504 -3.902 9.53e-05 ***
## Presentacion250 ml. NR PET -1.80762 0.80054 -2.258 0.023947 *
## Presentacion250 ML. NR VID -5.34271 1.47284 -3.627 0.000286 ***
## Presentacion3 Lts. NR 9.42048 4.30710 2.187 0.028730 *
## Presentacion300 ML. NR PET -5.05021 1.18999 -4.244 2.20e-05 ***
## Presentacion350 ML NR PET -11.57988 14.98353 -0.773 0.439618
## Presentacion355 Ml NR Pet -8.93640 3.16767 -2.821 0.004786 **
## Presentacion400 ml NR -1.25967 0.80624 -1.562 0.118195
## Presentacion413 ml NR VId -1.55894 1.17801 -1.323 0.185717
## Presentacion473 ml NR -4.64380 2.97450 -1.561 0.118479
## Presentacion5 Lts. NR 3.52407 1.88555 1.869 0.061627 .
## Presentacion500 ml NR PET -12.04383 0.93811 -12.838 < 2e-16 ***
## Presentacion500 ml NR Vidrio -3.09337 0.85170 -3.632 0.000281 ***
## Presentacion500 ml Ret 18.77284 0.76605 24.506 < 2e-16 ***
## Presentacion6.5 Oz. Ret -34.38652 3.06221 -11.229 < 2e-16 ***
## Presentacion600 ml NR -0.59223 0.64348 -0.920 0.357383
## Presentacion710 ml NR -25.35690 3.07363 -8.250 < 2e-16 ***
## Presentacion8 Oz. NR -22.21869 1.06702 -20.823 < 2e-16 ***
## Presentacion946 ml NR Tetra 1.26389 3.67915 0.344 0.731201
## PresentacionBag In Box -0.15245 19.67812 -0.008 0.993819
## PresentacionLata -19.12501 0.91368 -20.932 < 2e-16 ***
## PresentacionLata 16 Oz. -28.40721 1.06632 -26.640 < 2e-16 ***
## PresentacionLata 222 ml -3.93698 9.71413 -0.405 0.685270
## PresentacionLata 235 ml -17.17855 0.85820 -20.017 < 2e-16 ***
## PresentacionLata 237 ml -21.82926 11.90449 -1.834 0.066701 .
## PresentacionLata 335 ml -5.27661 2.85351 -1.849 0.064436 .
## PresentacionLata 340 ml -21.72660 2.64477 -8.215 < 2e-16 ***
## PresentacionLata 450 ml -20.91914 2.17439 -9.621 < 2e-16 ***
## PresentacionLata 453 ml -4.73762 2.77744 -1.706 0.088057 .
## PresentacionLATA 680 ML -2.64128 4.36134 -0.606 0.544774
## PresentacionLata 8 OZ. -0.88062 2.95299 -0.298 0.765541
## PresentacionSobres 907 grs NA NA NA NA
## TamañoIndividual 4.81031 0.74253 6.478 9.31e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.48 on 129244 degrees of freedom
## (100876 observations deleted due to missingness)
## Multiple R-squared: 0.3119, Adjusted R-squared: 0.3115
## F-statistic: 849 on 69 and 129244 DF, p-value: < 2.2e-16
datos_nuevos <- data.frame(Segmento.Det = "Colas Regular", Presentacion = "2.5 Lts. Ret Pet", Tamaño ="Familiar")
predict(regresion_ajustada, datos_nuevos)
## Warning in predict.lm(regresion_ajustada, datos_nuevos): prediction from a
## rank-deficient fit may be misleading
## 1
## 138.735
En conclusión podemos realizar regresiones lineales para generar modelos predictivos de variables de interés como las ventas. En este caso se recomienda tener más varibles y de mayor impacto sobre las ventas para que el modelo sea más confiable (actualmente 38%)