store.df=read.csv("StoreData.csv")
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : int 0 0 1 0 0 0 0 0 0 0 ...
## $ p2prom : int 0 0 0 0 1 0 0 0 0 0 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
Year <- store.df$Year
Year.f <- factor(Year, labels = c("1", "2"))
p1prom.f <- factor(store.df$p1prom, labels = c("No", "Yes"))
p2prom.f <- factor(store.df$p2prom, labels = c("No", "Yes"))
store.df$Year <- Year.f
store.df$p1prom <- p1prom.f
store.df$p2prom <- p2prom.f
is.factor(store.df$Year)
## [1] TRUE
is.factor(store.df$p1prom)
## [1] TRUE
is.factor(store.df$p2prom)
## [1] TRUE
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ p2prom : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
summary(store.df)
## storeNum Year Week p1sales p2sales
## Min. :101.0 1:1040 Min. : 1.00 Min. : 73 Min. : 51.0
## 1st Qu.:105.8 2:1040 1st Qu.:13.75 1st Qu.:113 1st Qu.: 84.0
## Median :110.5 Median :26.50 Median :129 Median : 96.0
## Mean :110.5 Mean :26.50 Mean :133 Mean :100.2
## 3rd Qu.:115.2 3rd Qu.:39.25 3rd Qu.:150 3rd Qu.:113.0
## Max. :120.0 Max. :52.00 Max. :263 Max. :225.0
##
## p1price p2price p1prom p2prom country
## Min. :2.190 Min. :2.29 No :1872 No :1792 AU:104
## 1st Qu.:2.290 1st Qu.:2.49 Yes: 208 Yes: 288 BR:208
## Median :2.490 Median :2.59 CN:208
## Mean :2.544 Mean :2.70 DE:520
## 3rd Qu.:2.790 3rd Qu.:2.99 GB:312
## Max. :2.990 Max. :3.19 JP:416
## US:312
library("psych")
describe(store.df)
## vars n mean sd median trimmed mad min max range
## storeNum 1 2080 110.50 5.77 110.50 110.50 7.41 101.00 120.00 19.0
## Year* 2 2080 1.50 0.50 1.50 1.50 0.74 1.00 2.00 1.0
## Week 3 2080 26.50 15.01 26.50 26.50 19.27 1.00 52.00 51.0
## p1sales 4 2080 133.05 28.37 129.00 131.08 26.69 73.00 263.00 190.0
## p2sales 5 2080 100.16 24.42 96.00 98.05 22.24 51.00 225.00 174.0
## p1price 6 2080 2.54 0.29 2.49 2.53 0.44 2.19 2.99 0.8
## p2price 7 2080 2.70 0.33 2.59 2.69 0.44 2.29 3.19 0.9
## p1prom* 8 2080 1.10 0.30 1.00 1.00 0.00 1.00 2.00 1.0
## p2prom* 9 2080 1.14 0.35 1.00 1.05 0.00 1.00 2.00 1.0
## country* 10 2080 4.55 1.72 4.50 4.62 2.22 1.00 7.00 6.0
## skew kurtosis se
## storeNum 0.00 -1.21 0.13
## Year* 0.00 -2.00 0.01
## Week 0.00 -1.20 0.33
## p1sales 0.74 0.66 0.62
## p2sales 0.99 1.51 0.54
## p1price 0.28 -1.44 0.01
## p2price 0.32 -1.40 0.01
## p1prom* 2.66 5.10 0.01
## p2prom* 2.09 2.38 0.01
## country* -0.29 -0.81 0.04
country <- with(store.df, table(country))
country
## country
## AU BR CN DE GB JP US
## 104 208 208 520 312 416 312
prop.table(country)*100
## country
## AU BR CN DE GB JP US
## 5 10 10 25 15 20 15
promotion <- xtabs(~p1prom+p2prom, data = store.df)
promotion
## p2prom
## p1prom No Yes
## No 1616 256
## Yes 176 32
prop.table(promotion)*100
## p2prom
## p1prom No Yes
## No 77.692308 12.307692
## Yes 8.461538 1.538462
aggregate(store.df$p1price~store.df$p1prom+store.df$p2prom, FUN = mean)
## store.df$p1prom store.df$p2prom store.df$p1price
## 1 No No 2.543342
## 2 Yes No 2.524659
## 3 No Yes 2.561484
## 4 Yes Yes 2.568125
aggregate(store.df$p2price~store.df$p1prom+store.df$p2prom, FUN = mean)
## store.df$p1prom store.df$p2prom store.df$p2price
## 1 No No 2.700891
## 2 Yes No 2.703068
## 3 No Yes 2.691953
## 4 Yes Yes 2.671250
aggregate(store.df$p1sales~store.df$p1prom+store.df$p2prom, FUN = mean)
## store.df$p1prom store.df$p2prom store.df$p1sales
## 1 No No 129.2630
## 2 Yes No 169.3523
## 3 No Yes 127.8242
## 4 Yes Yes 166.3438
aggregate(store.df$p2sales~store.df$p1prom+store.df$p2prom, FUN = mean)
## store.df$p1prom store.df$p2prom store.df$p2sales
## 1 No No 94.87067
## 2 Yes No 92.88636
## 3 No Yes 134.35547
## 4 Yes Yes 133.50000
The above table clearly shows that in spite of the lower price during promotions, overall sales of both products shot up.
When both the promotions were going on, the sales were slightly lesser than the time when only that product’s promotions were on.
Also, we see that during promotions, Coke’s (P1) sales increased by approx 31% while Pepsi’s (P2) sales increased by 41%. This may be due to the type and quality of promotion. So, the differences should be studied and can be used in the future.