##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## ID Año Territorio Sub.Territorio
## Min. : 1 Min. :2016 Length:466509 Length:466509
## 1st Qu.:116628 1st Qu.:2017 Class :character Class :character
## Median :233255 Median :2018 Mode :character Mode :character
## Mean :233255 Mean :2018
## 3rd Qu.:349882 3rd Qu.:2019
## Max. :466509 Max. :2019
## CEDI Cliente Nombre Tamaño.Cte.Industria
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Segmento.Det Marca Presentacion Tamaño
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Retornable_NR Enero Febrero Marzo
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Abril Mayo Junio Julio
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Agosto Septiembre Octubre Noviembre
## Length:466509 Length:466509 Length:466509 Length:466509
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Diciembre
## Length:466509
## Class :character
## Mode :character
##
##
##
## Territorio n
## 1 Guadalajara 466508
## 2 Territorio 1
# count(df, Sub.Territorio, sort = TRUE)
# count(df, CEDI, sort = TRUE)
# count(df, Cliente, sort = TRUE)
# count(df, Nombre, sort = TRUE)
# count(df, Tamaño.Cte.Industria, sort = TRUE)
# count(df, Segmento.Det, sort = TRUE)
# count(df, Marca, sort = TRUE)
# count(df, Presentacion, sort = TRUE)
# count(df, Tamaño, sort = TRUE)
# count(df, Retornable_NR, sort = TRUE)## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## ID Año Territorio Sub.Territorio
## Min. : 1 Min. :2016 Length:466508 Length:466508
## 1st Qu.:116628 1st Qu.:2017 Class :character Class :character
## Median :233256 Median :2018 Mode :character Mode :character
## Mean :233255 Mean :2018
## 3rd Qu.:349882 3rd Qu.:2019
## Max. :466509 Max. :2019
##
## CEDI Cliente Nombre Tamaño.Cte.Industria
## Length:466508 Length:466508 Length:466508 Length:466508
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Segmento.Det Marca Presentacion Tamaño
## Length:466508 Length:466508 Length:466508 Length:466508
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Retornable_NR Enero Febrero Marzo
## Length:466508 Min. :-19.00 Min. :-11.00 Min. :-32.00
## Class :character 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Mode :character Median : 2.00 Median : 2.00 Median : 3.00
## Mean : 9.39 Mean : 9.09 Mean : 10.54
## 3rd Qu.: 6.00 3rd Qu.: 6.00 3rd Qu.: 6.00
## Max. :999.00 Max. :986.00 Max. :986.00
## NA's :233551 NA's :231285 NA's :227506
## Abril Mayo Junio Julio
## Min. :-70.00 Min. :-106.00 Min. :-211.00 Min. :-60.00
## 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 3.00 Median : 3.00 Median : 3.00 Median : 2.00
## Mean : 10.62 Mean : 11.44 Mean : 10.98 Mean : 10.72
## 3rd Qu.: 6.00 3rd Qu.: 7.00 3rd Qu.: 6.00 3rd Qu.: 6.00
## Max. :993.00 Max. : 991.00 Max. : 998.00 Max. :993.00
## NA's :224185 NA's :217072 NA's :215907 NA's :223537
## Agosto Septiembre Octubre Noviembre
## Min. :-211.00 Min. :-527 Min. :-38.0 Min. :-25.0
## 1st Qu.: 1.00 1st Qu.: 1 1st Qu.: 1.0 1st Qu.: 1.0
## Median : 3.00 Median : 3 Median : 3.0 Median : 3.0
## Mean : 10.95 Mean : 12 Mean : 12.1 Mean : 11.8
## 3rd Qu.: 6.00 3rd Qu.: 7 3rd Qu.: 7.0 3rd Qu.: 6.0
## Max. : 999.00 Max. : 993 Max. :998.0 Max. :991.0
## NA's :220366 NA's :337401 NA's :338482 NA's :338545
## Diciembre
## Min. :-28
## 1st Qu.: 1
## Median : 3
## Mean : 13
## 3rd Qu.: 7
## Max. :997
## NA's :341954
## [1] 3149791
## ID Año Territorio
## 0 0 0
## Sub.Territorio CEDI Cliente
## 0 0 0
## Nombre Tamaño.Cte.Industria Segmento.Det
## 0 0 0
## Marca Presentacion Tamaño
## 0 0 0
## Retornable_NR Enero Febrero
## 0 233551 231285
## Marzo Abril Mayo
## 227506 224185 217072
## Junio Julio Agosto
## 215907 223537 220366
## Septiembre Octubre Noviembre
## 337401 338482 338545
## Diciembre
## 341954
# Reemplazar NA con 0
df[is.na(df)]<-0
# Agregar columna de ventas
df$Ventas <- df$Enero + df$Febrero + df$Marzo + df$Abril + df$Mayo + df$Junio + df$Julio + df$Agosto + df$Septiembre + df$Octubre + df$Noviembre + df$Diciembre
boxplot(df$Enero, horizontal = TRUE)## ID Año Territorio Sub.Territorio
## Min. : 1 Min. :2016 Length:466508 Length:466508
## 1st Qu.:116628 1st Qu.:2017 Class :character Class :character
## Median :233256 Median :2018 Mode :character Mode :character
## Mean :233255 Mean :2018
## 3rd Qu.:349882 3rd Qu.:2019
## Max. :466509 Max. :2019
## CEDI Cliente Nombre Tamaño.Cte.Industria
## Length:466508 Length:466508 Length:466508 Length:466508
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Segmento.Det Marca Presentacion Tamaño
## Length:466508 Length:466508 Length:466508 Length:466508
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Retornable_NR Enero Febrero Marzo
## Length:466508 Min. :-19.00 Min. :-11.000 Min. :-32.000
## Class :character 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000
## Mode :character Median : 0.00 Median : 0.000 Median : 0.000
## Mean : 4.69 Mean : 4.581 Mean : 5.401
## 3rd Qu.: 2.00 3rd Qu.: 2.000 3rd Qu.: 3.000
## Max. :999.00 Max. :986.000 Max. :986.000
## Abril Mayo Junio Julio
## Min. :-70.000 Min. :-106.000 Min. :-211.0 Min. :-60.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.0 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 0.0 Median : 0.000
## Mean : 5.519 Mean : 6.117 Mean : 5.9 Mean : 5.583
## 3rd Qu.: 3.000 3rd Qu.: 3.000 3rd Qu.: 3.0 3rd Qu.: 3.000
## Max. :993.000 Max. : 991.000 Max. : 998.0 Max. :993.000
## Agosto Septiembre Octubre Noviembre
## Min. :-211.000 Min. :-527.000 Min. :-38.000 Min. :-25.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 0.000 Median : 0.000
## Mean : 5.777 Mean : 3.311 Mean : 3.327 Mean : 3.248
## 3rd Qu.: 3.000 3rd Qu.: 1.000 3rd Qu.: 1.000 3rd Qu.: 1.000
## Max. : 999.000 Max. : 993.000 Max. :998.000 Max. :991.000
## Diciembre Ventas
## Min. :-28.000 Min. :-527.00
## 1st Qu.: 0.000 1st Qu.: 2.00
## Median : 0.000 Median : 7.00
## Mean : 3.477 Mean : 56.93
## 3rd Qu.: 0.000 3rd Qu.: 26.00
## Max. :997.000 Max. :9517.00
ggplot(df, aes(x = Año, y = Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat = "identity") +
labs(
title= "Ventas por año",
subtitle= "Caso Arca Continental"
)df2 <- df %>%filter(Tamaño.Cte.Industria == "Extra Grande" | Tamaño.Cte.Industria == "Micro")
ggplot(df2, aes(x = Año, y = Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat = "identity") +
labs(
title= "Ventas por año",
subtitle= "Caso Arca Continental"
)df3 <- df %>%filter(Tamaño.Cte.Industria == "Grande")
ggplot(df3, aes(x = Año, y = Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat = "identity") +
labs(
title= "Ventas por año",
subtitle= "Caso Arca Continental"
)df4 <- df %>%filter(Tamaño.Cte.Industria == "Micro")
ggplot(df4, aes(x = Año, y = Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat = "identity") +
labs(
title= "Ventas por año",
subtitle= "Caso Arca Continental"
)df5 <- df %>%filter(Tamaño.Cte.Industria == "Pequeño")
ggplot(df5, aes(x = Año, y = Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat = "identity") +
labs(
title= "Ventas por año",
subtitle= "Caso Arca Continental"
)df6 <- df %>% filter(Marca=="Coca-Cola")
ggplot(df6, aes(x=Año, y=Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat="identity") +
labs(
title = "Ventas por Año de Coca-Cola",
subtitle = "Caso Arca Continental"
)df7 <- df%>% filter(Marca == "Sprite")
ggplot(df7, aes(x = Año, y = Ventas, colour=Tamaño.Cte.Industria)) +
geom_bar(stat = "identity") +
labs(
title= "Ventas por año de Sprite",
subtitle= "Caso Arca Continental"
)df9 <- df %>% filter(Segmento.Det=="Agua Purificada" | Segmento.Det=="Isotónicos Regular" | Segmento.Det=="Colas Regular")
ggplot(df9, aes(x=Año, y=Ventas, fill=Segmento.Det)) +
geom_bar(position="dodge", stat="identity") +
labs(
title = "Ventas por Año",
subtitle = "Caso Arca Continental"
)df10 <- df %>% filter(Segmento.Det=="Agua Purificada" | Segmento.Det=="Colas Regular")
ggplot(df10, aes(x=Año, y=Ventas, fill=Segmento.Det)) +
geom_bar(position="dodge", stat="identity") +
labs(
title = "Ventas por Año",
subtitle = "Caso Arca Continental"
)df11 <- df %>% filter(Marca=="Coca-Cola" & Presentacion=="500 ml NR Vidrio")
df12 <- aggregate(Ventas ~ Año, df11, sum)
df12$Año <- as.integer(df12$Año)
regresion <- lm(Ventas ~ Año, data=df12)
summary(regresion)##
## Call:
## lm(formula = Ventas ~ Año, data = df12)
##
## Residuals:
## 1 2 3 4
## -23838.5 35316.5 882.5 -12360.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -54970594 28310425 -1.942 0.192
## Año 27433 14032 1.955 0.190
##
## Residual standard error: 31380 on 2 degrees of freedom
## Multiple R-squared: 0.6565, Adjusted R-squared: 0.4847
## F-statistic: 3.822 on 1 and 2 DF, p-value: 0.1898
## 1
## 444065.5