## [1] 73785 13
## [1] "StockistCode" "RetailerID" "RetailerCode" "NatureOfBus"
## [5] "OutletType" "OutletClass" "Month" "Category"
## [9] "Brand" "Pieces" "Cases" "GrossAMT"
## [13] "Sales"
## Classes 'data.table' and 'data.frame': 73785 obs. of 13 variables:
## $ StockistCode: int 22013913 22013306 22013306 22013193 22013193 22013115 22013115 22013115 22013115 22013115 ...
## $ RetailerID : int 305 296 296 443 329 674 687 41 772 347 ...
## $ RetailerCode: Factor w/ 10194 levels "0","03D","1",..: 6673 4129 4129 5981 3030 5792 5012 4741 7155 4454 ...
## $ NatureOfBus : Factor w/ 1 level "RTL": 1 1 1 1 1 1 1 1 1 1 ...
## $ OutletType : Factor w/ 6 levels "CH","CONF","ED",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ OutletClass : Factor w/ 4 levels "A","B","C","D": 2 3 3 2 4 3 3 1 2 3 ...
## $ Month : Factor w/ 5 levels "May","January",..: 5 5 3 4 1 4 1 5 1 5 ...
## $ Category : Factor w/ 4 levels "Activ 100% Juice",..: 1 1 4 4 4 4 4 4 4 4 ...
## $ Brand : Factor w/ 23 levels "Activ 1 Ltr",..: 2 2 13 13 13 13 13 13 13 13 ...
## $ Pieces : int 6 12 54 6 12 40 72 12 12 72 ...
## $ Cases : num 0.1 0.2 4.5 0.5 1 ...
## $ GrossAMT : num 184 367 3983 449 869 ...
## $ Sales : num 201 401 4372 502 971 ...
## - attr(*, ".internal.selfref")=<externalptr>
## NatureOfBus OutletType OutletClass Month
## RTL:73785 CH : 3538 A: 2826 May : 8333
## CONF:12954 B:21219 January :13161
## ED :13043 C:27457 February:16070
## GM :19650 D:22283 March :18528
## GROC:18222 April :17693
## PP : 6378
##
## Category Brand
## Activ 100% Juice :12876 Real 1 Ltr :26270
## Culinary : 8463 Real 200ml :21870
## Real Drinks : 3402 Activ 200ml :10801
## Real Juices & Nectars:49044 Real Drinks Koolerz : 3297
## Hommade Tomato Puree: 3166
## Hommade Lemoneez : 1921
## (Other) : 6460
## Pieces Cases GrossAMT
## Min. : 1.00 Min. : 0.0416 Min. : 87.2
## 1st Qu.: 9.00 1st Qu.: 0.5000 1st Qu.: 367.0
## Median : 30.00 Median : 1.0000 Median : 593.9
## Mean : 75.18 Mean : 3.3730 Mean : 2313.9
## 3rd Qu.: 60.00 3rd Qu.: 2.9999 3rd Qu.: 1882.5
## Max. :31963.00 Max. :1065.4334 Max. :831896.5
##
## Sales
## Min. : 102.0
## 1st Qu.: 401.5
## Median : 654.8
## Mean : 2356.6
## 3rd Qu.: 2019.9
## Max. :841531.8
##
## [1] 68105
# size of pieces between 200 & 500
H <- subset(dabur.dt$Pieces, dabur.dt$Pieces > 200 & dabur.dt$Pieces < 500)
length(H)## [1] 4015
# size of pieces between 500 & 1000
G <- subset(dabur.dt$Pieces, dabur.dt$Pieces > 500 & dabur.dt$Pieces < 1000)
length(G)## [1] 1014
## [1] 531
## [1] 68105 13
## NatureOfBus OutletType OutletClass Month
## RTL:68105 CH : 3429 A: 2337 May : 7848
## CONF:11974 B:19228 January :12810
## ED :11838 C:25608 February:14812
## GM :18171 D:20932 March :16510
## GROC:16790 April :16125
## PP : 5903
##
## Category Brand
## Activ 100% Juice :12736 Real 1 Ltr :25113
## Culinary : 8399 Real 200ml :17776
## Real Drinks : 3184 Activ 200ml :10662
## Real Juices & Nectars:43786 Hommade Tomato Puree: 3127
## Real Drinks Koolerz : 3086
## Hommade Lemoneez : 1918
## (Other) : 6423
## Pieces Cases GrossAMT Sales
## Min. : 1.00 Min. : 0.0416 Min. : 87.24 Min. : 102.0
## 1st Qu.: 7.00 1st Qu.: 0.4998 1st Qu.: 365.67 1st Qu.: 401.5
## Median : 27.00 Median : 1.0000 Median : 526.57 Median : 574.6
## Mean : 35.81 Mean : 1.7998 Mean : 1405.68 Mean : 1484.1
## 3rd Qu.: 40.00 3rd Qu.: 2.0000 3rd Qu.: 1347.14 3rd Qu.: 1495.1
## Max. :199.00 Max. :42.0000 Max. :36859.87 Max. :22494.6
##
## OutletType
## CH CONF ED GM GROC PP
## 5.03 17.58 17.38 26.68 24.65 8.67
# bar-plot
bp <- barplot(tab3,
xlab = "Outlet Type", ylab = "Percent (%)",
main = "Percentage of Different Types of Outlets",
col = c("lightblue"),
beside = TRUE,
ylim = c(0, 30))
text(bp, 0, round(tab3, 2),cex = 1, pos = 3) ## OutletClass
## A B C D
## 3.43 28.23 37.60 30.73
# bar-plot
bp <- barplot(tab3,
xlab = "Outlet Class", ylab = "Percent (%)",
main = "Percentage of Different Types of Outlet Classes",
col = c("lightblue"),
beside = TRUE,
ylim = c(0, 40))
text(bp, 0, round(tab3, 2),cex = 1, pos = 3) ## Month
## May January February March April
## 11.52 18.81 21.75 24.24 23.68
# bar-plot
bp <- barplot(tab3,
xlab = "Month", ylab = "Percent (%)",
main = "Percent Orders in Different Months",
col = c("lightblue"),
beside = TRUE,
ylim = c(0, 30))
text(bp, 0, round(tab3, 2),cex = 1, pos = 3) ## Category
## Activ 100% Juice Culinary Real Drinks
## 18.70 12.33 4.68
## Real Juices & Nectars
## 64.29
# bar-plot
bp <- barplot(tab3,
xlab = "Categorie", ylab = "Percent (%)",
main = "Percent Orders of Different Categories",
col = c("lightblue"),
beside = TRUE,
ylim = c(0, 65))
text(bp, 0, round(tab3, 2),cex = 1, pos = 3) tab1 <- table(OutletType,OutletClass)
tab2 <- prop.table(tab1,2)
tab3 <- round(tab2*100,2)
tab4 <- addmargins(tab3,1)
tab4## OutletClass
## OutletType A B C D
## CH 2.48 2.84 4.46 8.04
## CONF 15.28 18.10 15.17 20.31
## ED 21.48 19.54 16.90 15.52
## GM 18.78 30.00 32.01 17.99
## GROC 35.64 20.25 25.42 26.54
## PP 6.33 9.27 6.03 11.59
## Sum 99.99 100.00 99.99 99.99
# bar-plot
bp <- barplot(tab3,
xlab = "OutletClass", ylab = "Percent (%)",
main = "Distribution of Outlet Types Across Outlet Class",
col = c("lightblue","peru","Red","lightgreen","pink","orange"),
beside = TRUE,
ylim = c(0, 40),
legend = rownames(tab3))tab1 <- table(OutletClass,OutletType)
tab2 <- prop.table(tab1,2)
tab3 <- round(tab2*100,2)
tab4 <- addmargins(tab3,1)
tab4## OutletType
## OutletClass CH CONF ED GM GROC PP
## A 1.69 2.98 4.24 2.42 4.96 2.51
## B 15.92 29.06 31.75 31.74 23.19 30.20
## C 33.30 32.45 36.57 45.12 38.77 26.17
## D 49.08 35.51 27.45 20.73 33.09 41.11
## Sum 99.99 100.00 100.01 100.01 100.01 99.99
# bar-plot
bp <- barplot(tab3,
xlab = "Outlet Types", ylab = "Percent (%)",
main = "Distribution of Outlet Class Across Outlet Types",
col = c("lightblue","peru","Red","lightgreen"),
beside = TRUE,
ylim = c(0, 50),
legend = rownames(tab3))tab1 <- table(Category,OutletType)
tab2 <- prop.table(tab1,2)
tab3 <- round(tab2*100,2)
tab4 <- addmargins(tab3,1)
tab4## OutletType
## Category CH CONF ED GM GROC PP
## Activ 100% Juice 15.81 18.81 18.53 18.42 19.36 19.48
## Culinary 2.71 9.62 7.93 12.23 17.89 16.75
## Real Drinks 6.33 4.87 5.05 5.23 3.72 3.57
## Real Juices & Nectars 75.15 66.70 68.48 64.12 59.03 60.19
## Sum 100.00 100.00 99.99 100.00 100.00 99.99
# bar-plot
bp <- barplot(tab3,
xlab = "Outlet Typse", ylab = "Percent (%)",
main = "Distribution of Categories Across Outlet Types",
col = c("lightblue","peru","Red","lightgreen"),
beside = TRUE,
ylim = c(0, 75),
legend = rownames(tab3))tab1 <- table(Category,OutletClass)
tab2 <- prop.table(tab1,2)
tab3 <- round(tab2*100,2)
tab4 <- addmargins(tab3,1)
tab4## OutletClass
## Category A B C D
## Activ 100% Juice 23.36 20.21 17.78 17.92
## Culinary 19.13 16.21 10.96 9.70
## Real Drinks 4.66 3.83 4.72 5.40
## Real Juices & Nectars 52.85 59.75 66.54 66.99
## Sum 100.00 100.00 100.00 100.01
# bar-plot
bp <- barplot(tab3,
xlab = "Outlet Typse", ylab = "Percent (%)",
main = "Distribution of Categories Across Outlet Class",
col = c("lightblue","peru","Red","lightgreen"),
beside = TRUE,
ylim = c(0, 75),
legend = rownames(tab3))library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanGrossAMT = round(mean(GrossAMT),1),
sdGrossAMT = round(sd(GrossAMT),1)),
by = (OutletType)]
tab1## OutletType N meanGrossAMT sdGrossAMT
## 1: CH 3429 1074.6 1724.0
## 2: CONF 11974 1457.5 2197.0
## 3: ED 11838 1496.8 2297.8
## 4: GM 18171 1327.0 2174.3
## 5: GROC 16790 1395.5 2228.5
## 6: PP 5903 1581.4 2195.8
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "OutletType", y = "GrossAMT",
color = "OutletType", palette = "jco",ylim = c(0,4000))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$GrossAMT), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanGrossAMT = round(mean(GrossAMT),1),
sdGrossAMT = round(sd(GrossAMT),1)),
by = (OutletClass)]
tab1## OutletClass N meanGrossAMT sdGrossAMT
## 1: B 19228 1686.4 2430.0
## 2: C 25608 1328.0 2087.4
## 3: D 20932 1181.2 1971.2
## 4: A 2337 1958.4 2862.3
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "OutletClass", y = "GrossAMT",
color = "OutletClass", palette = "jco",ylim = c(0,5000))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$GrossAMT), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanSales = round(mean(Sales),1),
sdSales = round(sd(Sales),1)),
by = (OutletType)]
tab1## OutletType N meanSales sdSales
## 1: CH 3429 1141.1 1801.5
## 2: CONF 11974 1539.8 2303.2
## 3: ED 11838 1569.6 2371.2
## 4: GM 18171 1397.5 2247.1
## 5: GROC 16790 1476.4 2300.6
## 6: PP 5903 1687.1 2344.3
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "OutletType", y = "Sales",
color = "OutletType", palette = "jco",ylim = c(0,5000))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$Sales), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanSales = round(mean(Sales),1),
sdSales = round(sd(Sales),1)),
by = (OutletClass)]
tab1## OutletClass N meanSales sdSales
## 1: B 19228 1787.1 2553.9
## 2: C 25608 1400.9 2159.4
## 3: D 20932 1242.6 2036.9
## 4: A 2337 2065.4 2891.8
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "OutletClass", y = "Sales",
color = "OutletClass", palette = "jco",ylim = c(0,5000))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$Sales), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanPieces = round(mean(Pieces),1),
sdPieces = round(sd(Pieces),1),
meanCases = round(mean(Cases),1),
sdCases = round(sd(Cases),1)),
by = (OutletClass)]
tab1## OutletClass N meanPieces sdPieces meanCases sdCases
## 1: B 19228 41.1 42.7 2.1 2.8
## 2: C 25608 34.2 37.0 1.7 2.4
## 3: D 20932 31.8 35.5 1.6 2.3
## 4: A 2337 45.2 45.8 2.3 3.2
library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanPieces = round(mean(Pieces),1),
sdPieces = round(sd(Pieces),1),
meanCases = round(mean(Cases),1),
sdCases = round(sd(Cases),1)),
by = (OutletType)]
tab1## OutletType N meanPieces sdPieces meanCases sdCases
## 1: CH 3429 26.3 29.0 1.4 1.9
## 2: CONF 11974 37.6 39.7 1.9 2.5
## 3: ED 11838 37.8 39.2 1.9 2.6
## 4: GM 18171 34.4 37.9 1.7 2.5
## 5: GROC 16790 35.1 39.5 1.8 2.6
## 6: PP 5903 39.9 40.8 2.0 2.5
library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanGrossAMT = round(mean(GrossAMT),1),
sdGrossAMT = round(sd(GrossAMT),1)),
by = (Category)]
tab1## Category N meanGrossAMT sdGrossAMT
## 1: Activ 100% Juice 12736 621.1 888.8
## 2: Real Juices & Nectars 43786 1875.0 2563.8
## 3: Culinary 8399 543.0 680.9
## 4: Real Drinks 3184 366.0 159.9
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "Category", y = "GrossAMT",
color = "Category", palette = "jco",ylim = c(0,3200))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$GrossAMT), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanSales = round(mean(Sales),1),
sdSales = round(sd(Sales),1)),
by = (Category)]
tab1## Category N meanSales sdSales
## 1: Activ 100% Juice 12736 665.5 936.9
## 2: Real Juices & Nectars 43786 1969.0 2659.4
## 3: Culinary 8399 611.8 786.2
## 4: Real Drinks 3184 391.6 170.1
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "Category", y = "Sales",
color = "Category", palette = "jco",ylim = c(0,3200))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$Sales), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanGrossAMT = round(mean(GrossAMT),1),
sdGrossAMT = round(sd(GrossAMT),1)),
by = (Month)]
tab1## Month N meanGrossAMT sdGrossAMT
## 1: April 16125 1491.5 2330.3
## 2: February 14812 1367.7 2122.1
## 3: March 16510 1563.6 2441.5
## 4: May 7848 1292.5 1930.8
## 5: January 12810 1207.4 1893.8
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "Month", y = "GrossAMT",
color = "Month", palette = "jco",ylim = c(0,3200))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$GrossAMT), linetype = 2)library(data.table)
tab1 <- daburSub.dt[, .(N = .N,
meanSales = round(mean(Sales),1),
sdSales = round(sd(Sales),1)),
by = (Month)]
tab1## Month N meanSales sdSales
## 1: April 16125 1579.9 2448.8
## 2: February 14812 1444.7 2228.2
## 3: March 16510 1655.2 2565.6
## 4: May 7848 1362.0 2012.4
## 5: January 12810 1263.5 1834.0
library(ggpubr)
p <- ggboxplot(daburSub.dt, x = "Month", y = "Sales",
color = "Month", palette = "jco",ylim = c(0,3200))
# # Add horizontal line at base mean
p + geom_hline(yintercept = mean(daburSub.dt$Sales), linetype = 2)dabur.dt2 <- daburSub.dt[,c("Pieces","Cases","GrossAMT","Sales")]
library(Hmisc)
rcorr(as.matrix(dabur.dt2))## Pieces Cases GrossAMT Sales
## Pieces 1.00 0.82 0.70 0.69
## Cases 0.82 1.00 0.97 0.95
## GrossAMT 0.70 0.97 1.00 0.99
## Sales 0.69 0.95 0.99 1.00
##
## n= 68105
##
##
## P
## Pieces Cases GrossAMT Sales
## Pieces 0 0 0
## Cases 0 0 0
## GrossAMT 0 0 0
## Sales 0 0 0
## corrplot 0.84 loaded