knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
ifood <- read.csv("ifood_enriched.csv", sep=",", header=TRUE)
head(ifood)
## Education MaritalSts Income Kidhome Teenhome Recency WineExp FruitExp
## 1 Graduation Single 58138 0 0 58 635 88
## 2 PhD Together 30351 1 0 19 14 0
## 3 PhD Single 82800 0 0 23 1006 22
## 4 PhD Divorced 46610 0 2 8 96 12
## 5 PhD Divorced 48948 0 0 53 437 8
## 6 2n Cycle Married 41658 0 0 24 3 18
## MeatExp FishExp SweetExp GoldExp DealsPurc WebPurc CatalogPurc StorePurc
## 1 546 172 88 88 3 8 10 4
## 2 24 3 3 2 1 3 0 2
## 3 115 59 68 45 1 7 6 12
## 4 96 33 22 43 6 4 1 6
## 5 206 160 49 42 2 7 10 5
## 6 14 15 22 50 3 3 1 3
## WebVisits AccCmp3 AccCmp4 AccCmp5 AccCmp1 AccCmp2 Complain Response Age
## 1 7 0 0 0 0 0 0 1 63
## 2 9 0 0 0 0 0 0 1 46
## 3 3 0 0 1 1 0 0 1 74
## 4 6 0 0 0 0 0 0 1 68
## 5 6 1 0 0 0 0 0 1 77
## 6 9 0 0 0 0 0 0 1 24
## CustDays TotAccCmp TotalExp TotalPurchases PurchaseFrequency
## 1 3040 0 1617 25 0.24671053
## 2 2765 0 46 6 0.06509946
## 3 2959 2 1315 26 0.26360257
## 4 2985 0 302 17 0.17085427
## 5 2890 1 902 24 0.24913495
## 6 2974 0 122 10 0.10087424
## PreferredProductCategory PreferredChannel CustomerSegment PropensityScore
## 1 WineExp CatalogPurc 2 0.27157555
## 2 MeatExp WebPurc 3 0.08417035
## 3 WineExp StorePurc 2 0.79954889
## 4 WineExp DealsPurc 3 0.24208661
## 5 WineExp CatalogPurc 2 0.41634615
## 6 GoldExp DealsPurc 3 0.06745382
## EngagementIndex
## 1 42.78772
## 2 31.58658
## 3 50.58756
## 4 37.83652
## 5 40.01324
## 6 32.99664
str(ifood)
## 'data.frame': 2031 obs. of 35 variables:
## $ Education : chr "Graduation" "PhD" "PhD" "PhD" ...
## $ MaritalSts : chr "Single" "Together" "Single" "Divorced" ...
## $ Income : int 58138 30351 82800 46610 48948 41658 82582 82384 70287 75777 ...
## $ Kidhome : int 0 1 0 0 0 0 0 0 0 0 ...
## $ Teenhome : int 0 0 0 2 0 0 0 0 0 0 ...
## $ Recency : int 58 19 23 8 53 24 54 55 30 12 ...
## $ WineExp : int 635 14 1006 96 437 3 510 984 295 712 ...
## $ FruitExp : int 88 0 22 12 8 18 120 51 35 26 ...
## $ MeatExp : int 546 24 115 96 206 14 550 432 482 538 ...
## $ FishExp : int 172 3 59 33 160 15 156 180 121 69 ...
## $ SweetExp : int 88 3 68 22 49 22 40 120 120 13 ...
## $ GoldExp : int 88 2 45 43 42 50 241 190 40 80 ...
## $ DealsPurc : int 3 1 1 6 2 3 1 1 1 1 ...
## $ WebPurc : int 8 3 7 4 7 3 4 3 5 3 ...
## $ CatalogPurc : int 10 0 6 1 10 1 9 10 5 6 ...
## $ StorePurc : int 4 2 12 6 5 3 7 13 10 11 ...
## $ WebVisits : int 7 9 3 6 6 9 1 1 3 1 ...
## $ AccCmp3 : int 0 0 0 0 1 0 1 0 0 0 ...
## $ AccCmp4 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ AccCmp5 : int 0 0 1 0 0 0 0 1 0 1 ...
## $ AccCmp1 : int 0 0 1 0 0 0 1 0 0 0 ...
## $ AccCmp2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Complain : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Response : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Age : int 63 46 74 68 77 24 43 65 54 38 ...
## $ CustDays : int 3040 2765 2959 2985 2890 2974 2399 2964 2830 2737 ...
## $ TotAccCmp : int 0 0 2 0 1 0 2 1 0 2 ...
## $ TotalExp : int 1617 46 1315 302 902 122 1617 1957 1093 1438 ...
## $ TotalPurchases : int 25 6 26 17 24 10 21 27 21 21 ...
## $ PurchaseFrequency : num 0.2467 0.0651 0.2636 0.1709 0.2491 ...
## $ PreferredProductCategory: chr "WineExp" "MeatExp" "WineExp" "WineExp" ...
## $ PreferredChannel : chr "CatalogPurc" "WebPurc" "StorePurc" "DealsPurc" ...
## $ CustomerSegment : int 2 3 2 3 2 3 2 2 2 2 ...
## $ PropensityScore : num 0.2716 0.0842 0.7995 0.2421 0.4163 ...
## $ EngagementIndex : num 42.8 31.6 50.6 37.8 40 ...
# Resumen de todas las variables
summary(ifood)
## Education MaritalSts Income Kidhome
## Length:2031 Length:2031 Min. : 12571 Min. :0.0000
## Class :character Class :character 1st Qu.: 35828 1st Qu.:0.0000
## Mode :character Mode :character Median : 51563 Median :0.0000
## Mean : 52844 Mean :0.4446
## 3rd Qu.: 68656 3rd Qu.:1.0000
## Max. :666666 Max. :2.0000
## Teenhome Recency WineExp FruitExp
## Min. :0.0000 Min. : 0.00 Min. : 0.0 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:24.00 1st Qu.: 23.0 1st Qu.: 2.00
## Median :0.0000 Median :49.00 Median : 174.0 Median : 8.00
## Mean :0.5086 Mean :49.14 Mean : 303.8 Mean : 26.36
## 3rd Qu.:1.0000 3rd Qu.:74.00 3rd Qu.: 504.5 3rd Qu.: 33.00
## Max. :2.0000 Max. :99.00 Max. :1493.0 Max. :199.00
## MeatExp FishExp SweetExp GoldExp
## Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 16 1st Qu.: 3.00 1st Qu.: 1.00 1st Qu.: 9.00
## Median : 68 Median : 12.00 Median : 8.00 Median : 24.00
## Mean : 168 Mean : 37.38 Mean : 27.28 Mean : 43.69
## 3rd Qu.: 230 3rd Qu.: 50.00 3rd Qu.: 34.00 3rd Qu.: 56.00
## Max. :1725 Max. :259.00 Max. :263.00 Max. :362.00
## DealsPurc WebPurc CatalogPurc StorePurc
## Min. : 0.000 Min. : 0.0 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 2.0 1st Qu.: 0.000 1st Qu.: 3.000
## Median : 2.000 Median : 4.0 Median : 2.000 Median : 5.000
## Mean : 2.328 Mean : 4.1 Mean : 2.661 Mean : 5.776
## 3rd Qu.: 3.000 3rd Qu.: 6.0 3rd Qu.: 4.000 3rd Qu.: 8.000
## Max. :15.000 Max. :27.0 Max. :28.000 Max. :13.000
## WebVisits AccCmp3 AccCmp4 AccCmp5
## Min. : 0.000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.: 3.000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median : 6.000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean : 5.311 Mean :0.07287 Mean :0.07632 Mean :0.07189
## 3rd Qu.: 7.000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :20.000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## AccCmp1 AccCmp2 Complain Response
## Min. :0.00000 Min. :0.0000 Min. :0.000000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.000000 Median :0.0000
## Mean :0.06549 Mean :0.0128 Mean :0.009847 Mean :0.1531
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.000000 Max. :1.0000
## Age CustDays TotAccCmp TotalExp
## Min. :24.0 Min. :2377 Min. :0.0000 Min. : 5.0
## 1st Qu.:43.0 1st Qu.:2556 1st Qu.:0.0000 1st Qu.: 69.0
## Median :50.0 Median :2728 Median :0.0000 Median : 396.0
## Mean :51.2 Mean :2729 Mean :0.2994 Mean : 606.6
## 3rd Qu.:61.0 3rd Qu.:2904 3rd Qu.:0.0000 3rd Qu.:1044.5
## Max. :80.0 Max. :3076 Max. :4.0000 Max. :2525.0
## TotalPurchases PurchaseFrequency PreferredProductCategory PreferredChannel
## Min. : 0.00 Min. :0.00000 Length:2031 Length:2031
## 1st Qu.: 8.00 1st Qu.:0.08604 Class :character Class :character
## Median :15.00 Median :0.16535 Mode :character Mode :character
## Mean :14.87 Mean :0.16299
## 3rd Qu.:21.00 3rd Qu.:0.22948
## Max. :44.00 Max. :0.44453
## CustomerSegment PropensityScore EngagementIndex
## Min. :1.000 Min. :0.00000 Min. : 2.892
## 1st Qu.:1.000 1st Qu.:0.03488 1st Qu.:20.859
## Median :2.000 Median :0.07725 Median :27.401
## Mean :2.009 Mean :0.15313 Mean :28.453
## 3rd Qu.:3.000 3rd Qu.:0.18046 3rd Qu.:35.209
## Max. :3.000 Max. :0.99312 Max. :62.573
# Seleccionar variables numéricas
numericas <- sapply(ifood, is.numeric)
numericas <- names(ifood)[numericas]
# Histograma y boxplot para cada variable numérica
for (var in numericas) {
cat("Variable -> ", var, "\n\n")
# Histograma
hist(ifood[[var]], main=paste("Histograma de", var), col="skyblue", border="black")
# Boxplot
boxplot(ifood[[var]], main=paste("Boxplot de", var), col="orange", horizontal=TRUE)
# Tabla de frecuencias y resumen estadÃstico
# Muestra solo los primeros 20 valores
print(head(ifood[[var]], 20))
print(summary(ifood[[var]], 20))
cat("\n\n")
}
## Variable -> Income
## [1] 58138 30351 82800 46610 48948 41658 82582 82384 70287 75777 25721 88194
## [13] 75251 75825 56046 50388 87195 68126 86037 32557
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12571 35828 51563 52844 68656 666666
##
##
## Variable -> Kidhome
## [1] 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4446 1.0000 2.0000
##
##
## Variable -> Teenhome
## [1] 0 0 0 2 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.5086 1.0000 2.0000
##
##
## Variable -> Recency
## [1] 58 19 23 8 53 24 54 55 30 12 75 19 34 40 9 3 35 40 95 13
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 24.00 49.00 49.14 74.00 99.00
##
##
## Variable -> WineExp
## [1] 635 14 1006 96 437 3 510 984 295 712 1 688 721 1032 577
## [16] 292 217 1332 490 34
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 23.0 174.0 303.8 504.5 1493.0
##
##
## Variable -> FruitExp
## [1] 88 0 22 12 8 18 120 51 35 26 3 14 111 105 0 6 76 17 44
## [20] 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.00 8.00 26.36 33.00 199.00
##
##
## Variable -> MeatExp
## [1] 546 24 115 96 206 14 550 432 482 538 6 309 925 779 64 37 690 311 125
## [20] 29
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 16 68 168 230 1725
##
##
## Variable -> FishExp
## [1] 172 3 59 33 160 15 156 180 121 69 3 201 97 137 0 0 50 23 29
## [20] 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 3.00 12.00 37.38 50.00 259.00
##
##
## Variable -> SweetExp
## [1] 88 3 68 22 49 22 40 120 120 13 6 24 18 105 0 3 26 51 20
## [20] 4
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.00 8.00 27.28 34.00 263.00
##
##
## Variable -> GoldExp
## [1] 88 2 45 43 42 50 241 190 40 80 15 38 18 51 51 34 38 86 22
## [20] 10
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.00 24.00 43.69 56.00 362.00
##
##
## Variable -> DealsPurc
## [1] 3 1 1 6 2 3 1 1 1 1 1 1 1 0 2 4 1 1 1 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 2.000 2.328 3.000 15.000
##
##
## Variable -> WebPurc
## [1] 8 3 7 4 7 3 4 3 5 3 1 11 7 5 10 6 3 7 6 2
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 2.0 4.0 4.1 6.0 27.0
##
##
## Variable -> CatalogPurc
## [1] 10 0 6 1 10 1 9 10 5 6 1 10 6 8 1 1 11 4 7 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 2.000 2.661 4.000 28.000
##
##
## Variable -> StorePurc
## [1] 4 2 12 6 5 3 7 13 10 11 2 10 5 9 8 6 5 5 11 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.000 5.000 5.776 8.000 13.000
##
##
## Variable -> WebVisits
## [1] 7 9 3 6 6 9 1 1 3 1 7 5 5 4 8 7 1 9 3 5
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.000 6.000 5.311 7.000 20.000
##
##
## Variable -> AccCmp3
## [1] 0 0 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.07287 0.00000 1.00000
##
##
## Variable -> AccCmp4
## [1] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.07632 0.00000 1.00000
##
##
## Variable -> AccCmp5
## [1] 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.07189 0.00000 1.00000
##
##
## Variable -> AccCmp1
## [1] 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.06549 0.00000 1.00000
##
##
## Variable -> AccCmp2
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.0128 0.0000 1.0000
##
##
## Variable -> Complain
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.000000 0.000000 0.009847 0.000000 1.000000
##
##
## Variable -> Response
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1531 0.0000 1.0000
##
##
## Variable -> Age
## [1] 63 46 74 68 77 24 43 65 54 38 49 41 48 44 55 63 61 29 29 53
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24.0 43.0 50.0 51.2 61.0 80.0
##
##
## Variable -> CustDays
## [1] 3040 2765 2959 2985 2890 2974 2399 2964 2830 2737 2781 2479 3048 3002 2920
## [16] 2409 2429 2973 2920 2502
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2377 2556 2728 2729 2904 3076
##
##
## Variable -> TotAccCmp
## [1] 0 0 2 0 1 0 2 1 0 2 0 2 0 1 1 2 0 1 2 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2994 0.0000 4.0000
##
##
## Variable -> TotalExp
## [1] 1617 46 1315 302 902 122 1617 1957 1093 1438 34 1274 1890 2209 692
## [16] 372 1097 1820 730 80
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.0 69.0 396.0 606.6 1044.5 2525.0
##
##
## Variable -> TotalPurchases
## [1] 25 6 26 17 24 10 21 27 21 21 5 32 19 22 21 17 20 17 25 9
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.00 15.00 14.87 21.00 44.00
##
##
## Variable -> PurchaseFrequency
## [1] 0.24671053 0.06509946 0.26360257 0.17085427 0.24913495 0.10087424
## [7] 0.26260942 0.27327935 0.22261484 0.23017903 0.05393743 0.38725292
## [13] 0.18700787 0.21985343 0.21575342 0.21170610 0.24701523 0.17154390
## [19] 0.25684932 0.10791367
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.08604 0.16535 0.16299 0.22948 0.44453
##
##
## Variable -> CustomerSegment
## [1] 2 3 2 3 2 3 2 2 2 2 1 2 2 2 2 3 2 2 2 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 2.009 3.000 3.000
##
##
## Variable -> PropensityScore
## [1] 0.27157555 0.08417035 0.79954889 0.24208661 0.41634615 0.06745382
## [7] 0.70595587 0.25296114 0.19975531 0.89460261 0.02217032 0.80715988
## [13] 0.16345862 0.74285217 0.34268975 0.84286435 0.04230962 0.68776932
## [19] 0.20150298 0.25573209
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.03488 0.07725 0.15313 0.18046 0.99312
##
##
## Variable -> EngagementIndex
## [1] 42.78772 31.58658 50.58756 37.83652 40.01324 32.99664 42.44428 44.32927
## [9] 38.47561 49.51131 17.72385 55.79816 45.07131 50.08289 47.87513 47.06775
## [17] 35.04264 49.72897 30.95390 30.43164
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.892 20.859 27.401 28.453 35.209 62.573
# Seleccionar variables categóricas
categoricas <- sapply(ifood, function(x) is.factor(x) | is.character(x))
categoricas <- names(ifood)[categoricas]
# Análisis para cada variable categórica
for (var in categoricas) {
cat("###", var, "\n\n")
# Tabla de frecuencias
print(table(ifood[[var]]))
# Gráfico de barras
barplot(table(ifood[[var]]), main=paste("Distribución de", var), col=rainbow(length(unique(ifood[[var]]))))
# Gráfico de pastel
pie(table(ifood[[var]]), main=paste("Distribución de", var), col=rainbow(length(unique(ifood[[var]]))))
}
## ### Education
##
##
## 2n Cycle Basic Graduation Master PhD
## 186 49 1022 336 438
## ### MaritalSts
##
##
## Divorced Married Single Together Widow
## 213 787 445 516 70
## ### PreferredProductCategory
##
##
## FishExp FruitExp GoldExp MeatExp SweetExp WineExp
## 53 12 153 404 15 1394
## ### PreferredChannel
##
##
## CatalogPurc DealsPurc StorePurc WebPurc
## 138 250 1221 422