1. Reading local data in R
# reading data and storing into `store.df` dataframe
store.df <- read.csv(paste("StoreData.csv"))
# number of rows and columns
dim(store.df)
## [1] 2080 10
# column names
names(store.df)
## [1] "storeNum" "Year" "Week" "p1sales" "p2sales" "p1price"
## [7] "p2price" "p1prom" "p2prom" "country"
2. Understanding data in R
# output data types of different columns
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : int 0 0 1 0 0 0 0 0 0 0 ...
## $ p2prom : int 0 0 0 0 1 0 0 0 0 0 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
# converting into factor variables
store.df$Year.f <- factor(store.df$Year, labels = c("2001","2002"))
store.df$p1prom.f <- factor(store.df$p1prom, labels = c("No prom","Prom"))
store.df$p2prom.f <- factor(store.df$p2prom, labels = c("No prom","Prom"))
3. Summarising data in R
# summary statistics
summary(store.df)
## storeNum Year Week p1sales
## Min. :101.0 Min. :1.0 Min. : 1.00 Min. : 73
## 1st Qu.:105.8 1st Qu.:1.0 1st Qu.:13.75 1st Qu.:113
## Median :110.5 Median :1.5 Median :26.50 Median :129
## Mean :110.5 Mean :1.5 Mean :26.50 Mean :133
## 3rd Qu.:115.2 3rd Qu.:2.0 3rd Qu.:39.25 3rd Qu.:150
## Max. :120.0 Max. :2.0 Max. :52.00 Max. :263
##
## p2sales p1price p2price p1prom
## Min. : 51.0 Min. :2.190 Min. :2.29 Min. :0.0
## 1st Qu.: 84.0 1st Qu.:2.290 1st Qu.:2.49 1st Qu.:0.0
## Median : 96.0 Median :2.490 Median :2.59 Median :0.0
## Mean :100.2 Mean :2.544 Mean :2.70 Mean :0.1
## 3rd Qu.:113.0 3rd Qu.:2.790 3rd Qu.:2.99 3rd Qu.:0.0
## Max. :225.0 Max. :2.990 Max. :3.19 Max. :1.0
##
## p2prom country Year.f p1prom.f p2prom.f
## Min. :0.0000 AU:104 2001:1040 No prom:1872 No prom:1792
## 1st Qu.:0.0000 BR:208 2002:1040 Prom : 208 Prom : 288
## Median :0.0000 CN:208
## Mean :0.1385 DE:520
## 3rd Qu.:0.0000 GB:312
## Max. :1.0000 JP:416
## US:312
# using describe function
library(psych)
describe(store.df)
## vars n mean sd median trimmed mad min max range
## storeNum 1 2080 110.50 5.77 110.50 110.50 7.41 101.00 120.00 19.0
## Year 2 2080 1.50 0.50 1.50 1.50 0.74 1.00 2.00 1.0
## Week 3 2080 26.50 15.01 26.50 26.50 19.27 1.00 52.00 51.0
## p1sales 4 2080 133.05 28.37 129.00 131.08 26.69 73.00 263.00 190.0
## p2sales 5 2080 100.16 24.42 96.00 98.05 22.24 51.00 225.00 174.0
## p1price 6 2080 2.54 0.29 2.49 2.53 0.44 2.19 2.99 0.8
## p2price 7 2080 2.70 0.33 2.59 2.69 0.44 2.29 3.19 0.9
## p1prom 8 2080 0.10 0.30 0.00 0.00 0.00 0.00 1.00 1.0
## p2prom 9 2080 0.14 0.35 0.00 0.05 0.00 0.00 1.00 1.0
## country* 10 2080 4.55 1.72 4.50 4.62 2.22 1.00 7.00 6.0
## Year.f* 11 2080 1.50 0.50 1.50 1.50 0.74 1.00 2.00 1.0
## p1prom.f* 12 2080 1.10 0.30 1.00 1.00 0.00 1.00 2.00 1.0
## p2prom.f* 13 2080 1.14 0.35 1.00 1.05 0.00 1.00 2.00 1.0
## skew kurtosis se
## storeNum 0.00 -1.21 0.13
## Year 0.00 -2.00 0.01
## Week 0.00 -1.20 0.33
## p1sales 0.74 0.66 0.62
## p2sales 0.99 1.51 0.54
## p1price 0.28 -1.44 0.01
## p2price 0.32 -1.40 0.01
## p1prom 2.66 5.10 0.01
## p2prom 2.09 2.38 0.01
## country* -0.29 -0.81 0.04
## Year.f* 0.00 -2.00 0.01
## p1prom.f* 2.66 5.10 0.01
## p2prom.f* 2.09 2.38 0.01
4. Analysis by country
# Frequency at country
table(store.df$country)
##
## AU BR CN DE GB JP US
## 104 208 208 520 312 416 312
# using describe function
prop.table(table(store.df$country))*100
##
## AU BR CN DE GB JP US
## 5 10 10 25 15 20 15
5. Summarising Pepsi sales
# summary statistics of Pepsi
attach(store.df)
summary(p2sales)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 51.0 84.0 96.0 100.2 113.0 225.0
#Average sales of Pepsi on promotional and non promotional days
aggregate(p2sales ~ p2prom, store.df, mean)
## p2prom p2sales
## 1 0 94.67578
## 2 1 134.26042
6. Analysing both products based on Promotion sales
## Analysing at store level
# Creating tables for promotion analysis for each store
## mytable <- table(store.df$p1prom.f, store.df$p2prom.f, store.df$storeNum)
mytable <- xtabs(~ p1prom+p2prom+storeNum, data=store.df)
mytable
## , , storeNum = 101
##
## p2prom
## p1prom 0 1
## 0 85 11
## 1 7 1
##
## , , storeNum = 102
##
## p2prom
## p1prom 0 1
## 0 88 9
## 1 7 0
##
## , , storeNum = 103
##
## p2prom
## p1prom 0 1
## 0 72 20
## 1 8 4
##
## , , storeNum = 104
##
## p2prom
## p1prom 0 1
## 0 91 10
## 1 2 1
##
## , , storeNum = 105
##
## p2prom
## p1prom 0 1
## 0 78 14
## 1 11 1
##
## , , storeNum = 106
##
## p2prom
## p1prom 0 1
## 0 75 20
## 1 7 2
##
## , , storeNum = 107
##
## p2prom
## p1prom 0 1
## 0 83 12
## 1 8 1
##
## , , storeNum = 108
##
## p2prom
## p1prom 0 1
## 0 71 16
## 1 15 2
##
## , , storeNum = 109
##
## p2prom
## p1prom 0 1
## 0 84 12
## 1 8 0
##
## , , storeNum = 110
##
## p2prom
## p1prom 0 1
## 0 84 13
## 1 6 1
##
## , , storeNum = 111
##
## p2prom
## p1prom 0 1
## 0 85 12
## 1 5 2
##
## , , storeNum = 112
##
## p2prom
## p1prom 0 1
## 0 79 15
## 1 9 1
##
## , , storeNum = 113
##
## p2prom
## p1prom 0 1
## 0 74 15
## 1 11 4
##
## , , storeNum = 114
##
## p2prom
## p1prom 0 1
## 0 78 18
## 1 8 0
##
## , , storeNum = 115
##
## p2prom
## p1prom 0 1
## 0 83 9
## 1 8 4
##
## , , storeNum = 116
##
## p2prom
## p1prom 0 1
## 0 81 13
## 1 10 0
##
## , , storeNum = 117
##
## p2prom
## p1prom 0 1
## 0 81 12
## 1 10 1
##
## , , storeNum = 118
##
## p2prom
## p1prom 0 1
## 0 77 9
## 1 15 3
##
## , , storeNum = 119
##
## p2prom
## p1prom 0 1
## 0 81 9
## 1 11 3
##
## , , storeNum = 120
##
## p2prom
## p1prom 0 1
## 0 86 7
## 1 10 1
## converting to percentages
prop.table(mytable)*100
## , , storeNum = 101
##
## p2prom
## p1prom 0 1
## 0 4.08653846 0.52884615
## 1 0.33653846 0.04807692
##
## , , storeNum = 102
##
## p2prom
## p1prom 0 1
## 0 4.23076923 0.43269231
## 1 0.33653846 0.00000000
##
## , , storeNum = 103
##
## p2prom
## p1prom 0 1
## 0 3.46153846 0.96153846
## 1 0.38461538 0.19230769
##
## , , storeNum = 104
##
## p2prom
## p1prom 0 1
## 0 4.37500000 0.48076923
## 1 0.09615385 0.04807692
##
## , , storeNum = 105
##
## p2prom
## p1prom 0 1
## 0 3.75000000 0.67307692
## 1 0.52884615 0.04807692
##
## , , storeNum = 106
##
## p2prom
## p1prom 0 1
## 0 3.60576923 0.96153846
## 1 0.33653846 0.09615385
##
## , , storeNum = 107
##
## p2prom
## p1prom 0 1
## 0 3.99038462 0.57692308
## 1 0.38461538 0.04807692
##
## , , storeNum = 108
##
## p2prom
## p1prom 0 1
## 0 3.41346154 0.76923077
## 1 0.72115385 0.09615385
##
## , , storeNum = 109
##
## p2prom
## p1prom 0 1
## 0 4.03846154 0.57692308
## 1 0.38461538 0.00000000
##
## , , storeNum = 110
##
## p2prom
## p1prom 0 1
## 0 4.03846154 0.62500000
## 1 0.28846154 0.04807692
##
## , , storeNum = 111
##
## p2prom
## p1prom 0 1
## 0 4.08653846 0.57692308
## 1 0.24038462 0.09615385
##
## , , storeNum = 112
##
## p2prom
## p1prom 0 1
## 0 3.79807692 0.72115385
## 1 0.43269231 0.04807692
##
## , , storeNum = 113
##
## p2prom
## p1prom 0 1
## 0 3.55769231 0.72115385
## 1 0.52884615 0.19230769
##
## , , storeNum = 114
##
## p2prom
## p1prom 0 1
## 0 3.75000000 0.86538462
## 1 0.38461538 0.00000000
##
## , , storeNum = 115
##
## p2prom
## p1prom 0 1
## 0 3.99038462 0.43269231
## 1 0.38461538 0.19230769
##
## , , storeNum = 116
##
## p2prom
## p1prom 0 1
## 0 3.89423077 0.62500000
## 1 0.48076923 0.00000000
##
## , , storeNum = 117
##
## p2prom
## p1prom 0 1
## 0 3.89423077 0.57692308
## 1 0.48076923 0.04807692
##
## , , storeNum = 118
##
## p2prom
## p1prom 0 1
## 0 3.70192308 0.43269231
## 1 0.72115385 0.14423077
##
## , , storeNum = 119
##
## p2prom
## p1prom 0 1
## 0 3.89423077 0.43269231
## 1 0.52884615 0.14423077
##
## , , storeNum = 120
##
## p2prom
## p1prom 0 1
## 0 4.13461538 0.33653846
## 1 0.48076923 0.04807692
#Aggregating based on promotions
## prom_table <- aggregate(Week ~ storeNum+p1prom+p2prom, data = store.df, length)
#Average price and sales of both products based on promotion of products
aggregate(cbind(p1sales,p2sales,p1price,p2price) ~ p1prom.f+p2prom.f, data = store.df, mean)
## p1prom.f p2prom.f p1sales p2sales p1price p2price
## 1 No prom No prom 129.2630 94.87067 2.543342 2.700891
## 2 Prom No prom 169.3523 92.88636 2.524659 2.703068
## 3 No prom Prom 127.8242 134.35547 2.561484 2.691953
## 4 Prom Prom 166.3438 133.50000 2.568125 2.671250
## Analysing at country level
#Average price and sales of both products based on promotion of products at country level
aggregate(cbind(p1sales,p2sales,p1price,p2price) ~ country+p1prom.f+p2prom.f, data = store.df, mean)
## country p1prom.f p2prom.f p1sales p2sales p1price p2price
## 1 AU No prom No prom 133.9481 90.70130 2.519870 2.764026
## 2 BR No prom No prom 130.9477 94.86275 2.517451 2.707647
## 3 CN No prom No prom 125.5928 97.34731 2.584611 2.668443
## 4 DE No prom No prom 129.8166 94.26884 2.539246 2.698040
## 5 GB No prom No prom 130.0356 94.43874 2.541779 2.712134
## 6 JP No prom No prom 127.2879 95.38390 2.542322 2.678545
## 7 US No prom No prom 130.1469 95.24490 2.548367 2.721429
## 8 AU Prom No prom 177.8000 85.66667 2.376667 2.716667
## 9 BR Prom No prom 163.7000 96.95000 2.590000 2.665000
## 10 CN Prom No prom 172.4762 92.00000 2.509048 2.785238
## 11 DE Prom No prom 161.9535 96.27907 2.559767 2.655116
## 12 GB Prom No prom 153.3684 96.10526 2.647895 2.663684
## 13 JP Prom No prom 182.0000 88.00000 2.456667 2.756667
## 14 US Prom No prom 173.3182 93.54545 2.517273 2.690000
## 15 AU No prom Prom 119.6667 140.55556 2.678889 2.612222
## 16 BR No prom Prom 124.2667 138.80000 2.590000 2.646667
## 17 CN No prom Prom 128.3750 138.62500 2.658750 2.721250
## 18 DE No prom Prom 126.4722 133.73611 2.551111 2.708056
## 19 GB No prom Prom 126.9189 138.56757 2.538649 2.638649
## 20 JP No prom Prom 131.6731 130.21154 2.524615 2.728462
## 21 US No prom Prom 130.3750 130.52500 2.562500 2.705000
## 22 AU Prom Prom 162.0000 133.33333 2.590000 2.723333
## 23 BR Prom Prom 159.8000 149.00000 2.610000 2.590000
## 24 CN Prom Prom 182.7500 126.00000 2.365000 2.690000
## 25 DE Prom Prom 162.7143 139.28571 2.675714 2.704286
## 26 GB Prom Prom 159.0000 139.33333 2.790000 2.756667
## 27 JP Prom Prom 173.6000 119.20000 2.470000 2.670000
## 28 US Prom Prom 164.6000 126.80000 2.490000 2.610000