# file.choose()
bd <- read.csv("C:\\Users\\L03530621\\Desktop\\arca.csv")
bd$Cliente <- as.integer(bd$Cliente)
## Warning: NAs introduced by coercion
bd$Enero <- as.integer(bd$Enero)
## Warning: NAs introduced by coercion
bd$Febrero <- as.integer(bd$Febrero)
## Warning: NAs introduced by coercion
bd$Marzo <- as.integer(bd$Marzo)
## Warning: NAs introduced by coercion
bd$Abril <- as.integer(bd$Abril)
## Warning: NAs introduced by coercion
bd$Mayo <- as.integer(bd$Mayo)
## Warning: NAs introduced by coercion
bd$Junio <- as.integer(bd$Junio)
## Warning: NAs introduced by coercion
bd$Julio <- as.integer(bd$Julio)
## Warning: NAs introduced by coercion
bd$Agosto <- as.integer(bd$Agosto)
## Warning: NAs introduced by coercion
bd$Septiembre <- as.integer(bd$Septiembre)
## Warning: NAs introduced by coercion
bd$Octubre <- as.integer(bd$Octubre)
## Warning: NAs introduced by coercion
bd$Noviembre <- as.integer(bd$Noviembre)
## Warning: NAs introduced by coercion
bd$Diciembre <- as.integer(bd$Diciembre)
## Warning: NAs introduced by coercion
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bd2 <- filter(bd, TamaÃ.o.Cte.Industria=="Micro")
summary(bd2)
## ï..ID AÃ.o Territorio Sub.Territorio
## Min. : 58078 Min. :2016 Length:117110 Length:117110
## 1st Qu.: 87355 1st Qu.:2016 Class :character Class :character
## Median :175263 Median :2017 Mode :character Mode :character
## Mean :207202 Mean :2017
## 3rd Qu.:318024 3rd Qu.:2019
## Max. :429493 Max. :2019
##
## CEDI Cliente Nombre TamaÃ.o.Cte.Industria
## Length:117110 Min. : 10 Length:117110 Length:117110
## Class :character 1st Qu.: 2561 Class :character Class :character
## Mode :character Median : 6647 Mode :character Mode :character
## Mean :19154
## 3rd Qu.:33410
## Max. :77604
##
## Segmento.Det Marca Presentacion TamaÃ.o
## Length:117110 Length:117110 Length:117110 Length:117110
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Retornable_NR Enero Febrero Marzo
## Length:117110 Min. : -7.00 Min. : -3.00 Min. : -4.00
## Class :character 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Mode :character Median : 2.00 Median : 2.00 Median : 2.00
## Mean : 4.48 Mean : 4.24 Mean : 4.79
## 3rd Qu.: 4.00 3rd Qu.: 4.00 3rd Qu.: 4.00
## Max. :247.00 Max. :174.00 Max. :239.00
## NA's :76504 NA's :76009 NA's :76482
## Abril Mayo Junio Julio
## Min. :-11.00 Min. : -4.00 Min. :-11.00 Min. : -3.00
## 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 2.00 Median : 2.00 Median : 2.00 Median : 2.00
## Mean : 5.01 Mean : 5.06 Mean : 4.97 Mean : 4.91
## 3rd Qu.: 4.00 3rd Qu.: 4.00 3rd Qu.: 4.00 3rd Qu.: 4.00
## Max. :554.00 Max. :190.00 Max. :998.00 Max. :187.00
## NA's :76782 NA's :75426 NA's :75787 NA's :77799
## Agosto Septiembre Octubre Noviembre
## Min. :-14.00 Min. : -8.00 Min. :-38.00 Min. :-14.00
## 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 2.00 Median : 2.00 Median : 2.00 Median : 2.00
## Mean : 4.92 Mean : 4.67 Mean : 4.72 Mean : 4.48
## 3rd Qu.: 4.00 3rd Qu.: 4.00 3rd Qu.: 4.00 3rd Qu.: 4.00
## Max. :254.00 Max. :251.00 Max. :144.00 Max. :134.00
## NA's :76154 NA's :90913 NA's :91236 NA's :92121
## Diciembre
## Min. :-17.00
## 1st Qu.: 1.00
## Median : 2.00
## Mean : 4.79
## 3rd Qu.: 4.00
## Max. :278.00
## NA's :93290
regresion <- lm(Enero ~ Marca + Presentacion + TamaÃ.o + Retornable_NR, data=bd2)
summary(regresion)
##
## Call:
## lm(formula = Enero ~ Marca + Presentacion + TamaÃ.o + Retornable_NR,
## data = bd2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.046 -1.872 -0.500 0.887 224.954
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.33559 2.97774 0.113 0.910268
## MarcaAdeS Lácteo -0.07062 1.78264 -0.040 0.968402
## MarcaBarista Bros -3.52890 4.12251 -0.856 0.391999
## MarcaBebere -2.66525 3.89704 -0.684 0.494031
## MarcaBurn 0.78786 3.22383 0.244 0.806933
## MarcaCiel Agua Purificada -0.53292 3.02581 -0.176 0.860197
## MarcaCiel Exprim -2.62607 3.03116 -0.866 0.386299
## MarcaCiel Mineralizada -2.07322 3.05568 -0.678 0.497470
## MarcaCiel Mini 2.11819 3.23076 0.656 0.512064
## MarcaCiel Saborizada -2.26129 3.14265 -0.720 0.471806
## MarcaCoca-Cola 4.93779 2.98184 1.656 0.097739 .
## MarcaCoca-Cola Life 1.13279 3.03311 0.373 0.708796
## MarcaCoca-Cola Light 2.50896 2.98450 0.841 0.400543
## MarcaCoca-Cola Sin Azúcar 1.04258 2.99957 0.348 0.728160
## MarcaCoca-Cola Zero 1.73682 3.03169 0.573 0.566723
## MarcaDel Valle -0.43315 2.83382 -0.153 0.878518
## MarcaDel Valle Blends -3.57704 4.11617 -0.869 0.384841
## MarcaDel Valle Nutridefen -3.65214 3.47447 -1.051 0.293203
## MarcaDel Valle Reserva 1.30639 3.04937 0.428 0.668353
## MarcaDel Valle y Nada -2.09332 3.00451 -0.697 0.485979
## MarcaDelaware Punch -3.15473 3.00857 -1.049 0.294378
## MarcaFanta -3.57542 2.98549 -1.198 0.231081
## MarcaFresca -4.14873 2.98694 -1.389 0.164853
## MarcaFrutsi -2.59626 3.00616 -0.864 0.387787
## MarcaFuze Tea -2.46805 2.98722 -0.826 0.408694
## MarcaFuze Tea Light -3.26155 3.97524 -0.820 0.411955
## MarcaGlacéau -0.75418 3.64690 -0.207 0.836167
## MarcaManzana Lift -4.18233 2.98671 -1.400 0.161428
## MarcaMonster Energy 2.76414 3.12721 0.884 0.376755
## MarcaPowerade -2.29486 3.00852 -0.763 0.445596
## MarcaPowerade Zero -3.26155 3.15498 -1.034 0.301246
## MarcaPulpy -3.38954 3.15080 -1.076 0.282036
## MarcaSanta Clara Deslacto -1.09531 3.10957 -0.352 0.724662
## MarcaSanta Clara Entera -0.45895 3.09903 -0.148 0.882270
## MarcaSanta Clara Light -0.78050 3.19711 -0.244 0.807135
## MarcaSanta Clara Saboriza -0.15733 1.34543 -0.117 0.906912
## MarcaSenzao -2.99364 3.05107 -0.981 0.326512
## MarcaSidral Mundet -2.69726 2.99918 -0.899 0.368482
## MarcaSprite -3.34429 2.98520 -1.120 0.262598
## MarcaSprite Zero -3.00998 3.03857 -0.991 0.321892
## MarcaTopo Chico A.M. -1.73714 3.02939 -0.573 0.566356
## MarcaValle Frut 0.13773 2.98744 0.046 0.963229
## Presentacion1 Ltro. Tetra 2.25972 1.48952 1.517 0.129255
## Presentacion1.250 Lts NR 1.08347 0.36425 2.975 0.002936 **
## Presentacion1.5 Lts. NR 1.96573 0.22234 8.841 < 2e-16 ***
## Presentacion1.5 Lts. Ret 6.01180 0.25840 23.265 < 2e-16 ***
## Presentacion1.750 Lts NR -0.30889 0.56599 -0.546 0.585241
## Presentacion100 ml NR Tetra -2.48459 1.52049 -1.634 0.102252
## Presentacion12 Oz. NR Pet -4.51740 0.38786 -11.647 < 2e-16 ***
## Presentacion12 Oz. NR Vidrio -0.67811 6.96273 -0.097 0.922416
## Presentacion12 Oz. Ret -1.92953 0.43266 -4.460 8.23e-06 ***
## Presentacion125 ml NR Tetra -2.03776 1.55637 -1.309 0.190438
## Presentacion2 Lts. NR 4.74016 0.23534 20.142 < 2e-16 ***
## Presentacion2 Lts. Ret 2.09870 0.57036 3.680 0.000234 ***
## Presentacion2.5 Lts. NR 4.46473 0.26410 16.905 < 2e-16 ***
## Presentacion2.5 Lts. Ret Pet 16.77254 0.24543 68.339 < 2e-16 ***
## Presentacion200 ml Tetra -2.98929 3.22167 -0.928 0.353481
## Presentacion235 ml NR Vid -6.02894 0.42516 -14.180 < 2e-16 ***
## Presentacion237 ml NR Pet -4.48925 1.47123 -3.051 0.002280 **
## Presentacion237 ml NR Vid -3.54805 2.18943 -1.621 0.105125
## Presentacion250 ml Tetra -2.38707 1.00896 -2.366 0.017993 *
## Presentacion250 ml. NR PET 0.61364 0.42311 1.450 0.146982
## Presentacion250 ML. NR VID -2.11076 1.11686 -1.890 0.058778 .
## Presentacion3 Lts. NR 5.73666 2.47967 2.313 0.020701 *
## Presentacion300 ML. NR PET -4.23494 0.95019 -4.457 8.34e-06 ***
## Presentacion350 ML NR PET -2.38233 4.90786 -0.485 0.627387
## Presentacion355 Ml NR Pet -0.65506 1.49034 -0.440 0.660273
## Presentacion400 ml NR 0.49512 0.36135 1.370 0.170632
## Presentacion413 ml NR VId -1.10982 0.98418 -1.128 0.259471
## Presentacion473 ml NR NA NA NA NA
## Presentacion5 Lts. NR 6.08621 1.10281 5.519 3.43e-08 ***
## Presentacion500 ml NR PET -1.84289 0.41366 -4.455 8.41e-06 ***
## Presentacion500 ml NR Vidrio -2.95879 0.36603 -8.083 6.47e-16 ***
## Presentacion500 ml Ret 4.40968 0.33915 13.002 < 2e-16 ***
## Presentacion6.5 Oz. Ret -6.69590 1.22086 -5.485 4.17e-08 ***
## Presentacion600 ml NR 0.84630 0.30222 2.800 0.005108 **
## Presentacion710 ml NR -2.95304 3.12043 -0.946 0.343972
## Presentacion8 Oz. NR -6.21269 0.42867 -14.493 < 2e-16 ***
## Presentacion946 ml NR Tetra 0.51789 2.64182 0.196 0.844582
## PresentacionLata -5.21036 0.41438 -12.574 < 2e-16 ***
## PresentacionLata 16 Oz. -5.90542 0.58748 -10.052 < 2e-16 ***
## PresentacionLata 222 ml -0.44720 4.92249 -0.091 0.927613
## PresentacionLata 235 ml -4.05160 0.39009 -10.386 < 2e-16 ***
## PresentacionLata 237 ml -4.20311 7.05089 -0.596 0.551104
## PresentacionLata 335 ml -2.41068 1.80466 -1.336 0.181619
## PresentacionLata 340 ml -2.50006 2.22531 -1.123 0.261246
## PresentacionLata 450 ml -3.20311 1.63674 -1.957 0.050353 .
## PresentacionLata 453 ml -1.84838 1.90027 -0.973 0.330713
## PresentacionLATA 680 ML 0.05280 3.49351 0.015 0.987942
## PresentacionLata 8 OZ. 0.39744 1.58511 0.251 0.802024
## TamaÃ.oIndividual 3.07966 0.36006 8.553 < 2e-16 ***
## Retornable_NRRetornable NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.936 on 40516 degrees of freedom
## (76504 observations deleted due to missingness)
## Multiple R-squared: 0.3804, Adjusted R-squared: 0.3791
## F-statistic: 279.5 on 89 and 40516 DF, p-value: < 2.2e-16
regresion_ajustada <- lm(Enero ~ Segmento.Det + Presentacion + TamaÃ.o, data=bd2)
summary(regresion_ajustada)
##
## Call:
## lm(formula = Enero ~ Segmento.Det + Presentacion + TamaÃ.o, data = bd2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.046 -2.076 -0.595 0.924 224.954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.02498 0.45311 -2.262 0.02370 *
## Segmento.DetAgua Purificada 1.37679 0.50259 2.739 0.00616 **
## Segmento.DetAgua Saborizada -0.30267 0.56938 -0.532 0.59502
## Segmento.DetBebidas de Fruta 0.71173 0.45116 1.578 0.11468
## Segmento.DetBebidas de Soya 1.41149 2.77094 0.509 0.61048
## Segmento.DetBebidas Energeticas 3.55121 0.82388 4.310 1.63e-05 ***
## Segmento.DetCafe Listo Para Bebe -0.61215 2.87457 -0.213 0.83136
## Segmento.DetColas Light 3.82549 0.45225 8.459 < 2e-16 ***
## Segmento.DetColas Regular 6.73066 0.43090 15.620 < 2e-16 ***
## Segmento.DetIsotónicos Light -1.32920 1.09404 -1.215 0.22439
## Segmento.DetIsotónicos Regular -0.41929 0.49576 -0.846 0.39770
## Segmento.DetJugos y Néctares 1.42451 0.63892 2.230 0.02578 *
## Segmento.DetLeche UHT Especializ 0.76234 1.43278 0.532 0.59468
## Segmento.DetLeche UHT Regular 1.32321 1.38665 0.954 0.33996
## Segmento.DetLeche UHT Saborizada 1.26633 3.06592 0.413 0.67958
## Segmento.DetSabores Light -1.06962 0.57087 -1.874 0.06098 .
## Segmento.DetSabores Regular -1.65506 0.42076 -3.933 8.39e-05 ***
## Segmento.DetTé Light -1.32920 2.65820 -0.500 0.61705
## Segmento.DetTé Regular -0.53880 0.50531 -1.066 0.28631
## Presentacion1 Ltro. Tetra 1.76263 1.24398 1.417 0.15651
## Presentacion1.250 Lts NR 0.65117 0.36204 1.799 0.07208 .
## Presentacion1.5 Lts. NR 1.59668 0.21801 7.324 2.46e-13 ***
## Presentacion1.5 Lts. Ret 5.57937 0.25481 21.896 < 2e-16 ***
## Presentacion1.750 Lts NR -0.74119 0.56514 -1.312 0.18970
## Presentacion100 ml NR Tetra -2.19047 1.41045 -1.553 0.12042
## Presentacion12 Oz. NR Pet -4.29893 0.38201 -11.253 < 2e-16 ***
## Presentacion12 Oz. NR Vidrio -0.64382 6.96658 -0.092 0.92637
## Presentacion12 Oz. Ret -1.99479 0.42936 -4.646 3.40e-06 ***
## Presentacion125 ml NR Tetra -2.22458 1.34928 -1.649 0.09921 .
## Presentacion2 Lts. NR 4.34058 0.23042 18.837 < 2e-16 ***
## Presentacion2 Lts. Ret 1.69384 0.56960 2.974 0.00294 **
## Presentacion2.5 Lts. NR 3.95996 0.25971 15.248 < 2e-16 ***
## Presentacion2.5 Lts. Ret Pet 16.34023 0.24161 67.631 < 2e-16 ***
## Presentacion200 ml Tetra -2.64152 3.07000 -0.860 0.38956
## Presentacion235 ml NR Vid -6.04720 0.42120 -14.357 < 2e-16 ***
## Presentacion237 ml NR Pet -2.89592 1.40592 -2.060 0.03942 *
## Presentacion237 ml NR Vid -4.46932 2.12572 -2.102 0.03552 *
## Presentacion250 ml Tetra -2.47330 0.60345 -4.099 4.16e-05 ***
## Presentacion250 ml. NR PET -0.53167 0.37288 -1.426 0.15392
## Presentacion250 ML. NR VID -2.19699 0.77092 -2.850 0.00438 **
## Presentacion3 Lts. NR 6.05504 2.46437 2.457 0.01401 *
## Presentacion300 ML. NR PET -1.68082 0.66244 -2.537 0.01117 *
## Presentacion350 ML NR PET -2.52062 4.91639 -0.513 0.60817
## Presentacion355 Ml NR Pet -0.79334 1.49280 -0.531 0.59511
## Presentacion400 ml NR 0.19389 0.35483 0.546 0.58478
## Presentacion413 ml NR VId -1.19742 0.58135 -2.060 0.03943 *
## Presentacion473 ml NR -1.60555 2.48130 -0.647 0.51760
## Presentacion5 Lts. NR 5.53707 1.10131 5.028 4.98e-07 ***
## Presentacion500 ml NR PET -2.02855 0.40887 -4.961 7.03e-07 ***
## Presentacion500 ml NR Vidrio -2.93297 0.36139 -8.116 4.96e-16 ***
## Presentacion500 ml Ret 4.21004 0.33390 12.609 < 2e-16 ***
## Presentacion6.5 Oz. Ret -6.71734 1.22149 -5.499 3.84e-08 ***
## Presentacion600 ml NR 0.68538 0.29883 2.294 0.02182 *
## Presentacion710 ml NR -2.97448 3.12530 -0.952 0.34123
## Presentacion8 Oz. NR -6.15745 0.42505 -14.487 < 2e-16 ***
## Presentacion946 ml NR Tetra 0.43167 2.51917 0.171 0.86395
## PresentacionLata -5.24910 0.41074 -12.780 < 2e-16 ***
## PresentacionLata 16 Oz. -5.66742 0.56110 -10.101 < 2e-16 ***
## PresentacionLata 222 ml -0.60503 4.93059 -0.123 0.90234
## PresentacionLata 235 ml -4.17525 0.38150 -10.944 < 2e-16 ***
## PresentacionLata 237 ml -5.19503 6.99125 -0.743 0.45744
## PresentacionLata 335 ml -2.49690 1.61578 -1.545 0.12228
## PresentacionLata 340 ml -5.03401 1.00660 -5.001 5.73e-07 ***
## PresentacionLata 450 ml -4.19503 1.29495 -3.240 0.00120 **
## PresentacionLata 453 ml -1.95250 1.79909 -1.085 0.27781
## PresentacionLATA 680 ML -0.10503 3.49890 -0.030 0.97605
## PresentacionLata 8 OZ. 0.01123 1.58639 0.007 0.99435
## TamaÃ.oIndividual 2.66880 0.35196 7.583 3.46e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.948 on 40538 degrees of freedom
## (76504 observations deleted due to missingness)
## Multiple R-squared: 0.3779, Adjusted R-squared: 0.3769
## F-statistic: 367.6 on 67 and 40538 DF, p-value: < 2.2e-16
plot(regresion_ajustada)
## Warning: not plotting observations with leverage one:
## 4719
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
datos_nuevos <- data.frame(Segmento.Det= "Colas Regular", Presentacion= "2.5 Lts. Ret Pet", TamaÃ.o="Familiar")
predict(regresion_ajustada,datos_nuevos)
## 1
## 22.04592
En conclusión podemos realizar regresiones lineales para generar modelos predictivos de variables de interés como las ventas. En este caso, se recomienda tener más variables y de mayor impacto sobre las ventas para que el modelo sea más confiable (actualmente 38%).