Question1

store.df=read.csv("StoreData.csv")
dim(store.df)
## [1] 2080   10
colnames(store.df)
##  [1] "storeNum" "Year"     "Week"     "p1sales"  "p2sales"  "p1price" 
##  [7] "p2price"  "p1prom"   "p2prom"   "country"

Question2

str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ p2prom  : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
store.df$Year=as.factor(store.df$Year)
store.df$p1prom=as.factor(store.df$p1prom)
store.df$p2prom=as.factor(store.df$p2prom)
store.df$country=as.factor(store.df$country)
str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
##  $ p2prom  : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...

Question3

summary(store.df)
##     storeNum     Year          Week          p1sales       p2sales     
##  Min.   :101.0   1:1040   Min.   : 1.00   Min.   : 73   Min.   : 51.0  
##  1st Qu.:105.8   2:1040   1st Qu.:13.75   1st Qu.:113   1st Qu.: 84.0  
##  Median :110.5            Median :26.50   Median :129   Median : 96.0  
##  Mean   :110.5            Mean   :26.50   Mean   :133   Mean   :100.2  
##  3rd Qu.:115.2            3rd Qu.:39.25   3rd Qu.:150   3rd Qu.:113.0  
##  Max.   :120.0            Max.   :52.00   Max.   :263   Max.   :225.0  
##                                                                        
##     p1price         p2price     p1prom   p2prom   country 
##  Min.   :2.190   Min.   :2.29   0:1872   0:1792   AU:104  
##  1st Qu.:2.290   1st Qu.:2.49   1: 208   1: 288   BR:208  
##  Median :2.490   Median :2.59                     CN:208  
##  Mean   :2.544   Mean   :2.70                     DE:520  
##  3rd Qu.:2.790   3rd Qu.:2.99                     GB:312  
##  Max.   :2.990   Max.   :3.19                     JP:416  
##                                                   US:312
psych::describe(store.df)
##          vars    n   mean    sd median trimmed   mad    min    max range
## storeNum    1 2080 110.50  5.77 110.50  110.50  7.41 101.00 120.00  19.0
## Year*       2 2080   1.50  0.50   1.50    1.50  0.74   1.00   2.00   1.0
## Week        3 2080  26.50 15.01  26.50   26.50 19.27   1.00  52.00  51.0
## p1sales     4 2080 133.05 28.37 129.00  131.08 26.69  73.00 263.00 190.0
## p2sales     5 2080 100.16 24.42  96.00   98.05 22.24  51.00 225.00 174.0
## p1price     6 2080   2.54  0.29   2.49    2.53  0.44   2.19   2.99   0.8
## p2price     7 2080   2.70  0.33   2.59    2.69  0.44   2.29   3.19   0.9
## p1prom*     8 2080   1.10  0.30   1.00    1.00  0.00   1.00   2.00   1.0
## p2prom*     9 2080   1.14  0.35   1.00    1.05  0.00   1.00   2.00   1.0
## country*   10 2080   4.55  1.72   4.50    4.62  2.22   1.00   7.00   6.0
##           skew kurtosis   se
## storeNum  0.00    -1.21 0.13
## Year*     0.00    -2.00 0.01
## Week      0.00    -1.20 0.33
## p1sales   0.74     0.66 0.62
## p2sales   0.99     1.51 0.54
## p1price   0.28    -1.44 0.01
## p2price   0.32    -1.40 0.01
## p1prom*   2.66     5.10 0.01
## p2prom*   2.09     2.38 0.01
## country* -0.29    -0.81 0.04

Question4

cntrfreq<-with(store.df,table(country))
cntrfreq
## country
##  AU  BR  CN  DE  GB  JP  US 
## 104 208 208 520 312 416 312
prop.table(cntrfreq)*100
## country
## AU BR CN DE GB JP US 
##  5 10 10 25 15 20 15

Question5

mt<-xtabs(~p1prom+p2prom,data=store.df)
mt
##       p2prom
## p1prom    0    1
##      0 1616  256
##      1  176   32
prop.table(mt)*100
##       p2prom
## p1prom         0         1
##      0 77.692308 12.307692
##      1  8.461538  1.538462
aggregate(store.df$p1price~store.df$p1prom+store.df$p2prom,FUN=mean)
##   store.df$p1prom store.df$p2prom store.df$p1price
## 1               0               0         2.543342
## 2               1               0         2.524659
## 3               0               1         2.561484
## 4               1               1         2.568125
aggregate(store.df$p2price~store.df$p1prom+store.df$p2prom,FUN=mean)
##   store.df$p1prom store.df$p2prom store.df$p2price
## 1               0               0         2.700891
## 2               1               0         2.703068
## 3               0               1         2.691953
## 4               1               1         2.671250

For other insights

aggregate(store.df$p1price~store.df$p1prom,FUN=mean)
##   store.df$p1prom store.df$p1price
## 1               0         2.545823
## 2               1         2.531346
aggregate(store.df$p2price~store.df$p2prom,FUN=mean)
##   store.df$p2prom store.df$p2price
## 1               0         2.701105
## 2               1         2.689653
aggregate(store.df$p1sales~store.df$p1prom,FUN=mean)
##   store.df$p1prom store.df$p1sales
## 1               0         129.0662
## 2               1         168.8894
aggregate(store.df$p2sales~store.df$p2prom,FUN=mean)
##   store.df$p2prom store.df$p2sales
## 1               0         94.67578
## 2               1        134.26042
aggregate(store.df$p1sales~store.df$p1prom+store.df$p2prom,FUN=mean)
##   store.df$p1prom store.df$p2prom store.df$p1sales
## 1               0               0         129.2630
## 2               1               0         169.3523
## 3               0               1         127.8242
## 4               1               1         166.3438
aggregate(store.df$p2sales~store.df$p1prom+store.df$p2prom,FUN=mean)
##   store.df$p1prom store.df$p2prom store.df$p2sales
## 1               0               0         94.87067
## 2               1               0         92.88636
## 3               0               1        134.35547
## 4               1               1        133.50000

Qualitative insights

  1. Coke’s mean (133) as well median (129) sales were higher than that of Pepsi’s (100.2 and 96 respectively)
  2. This could probably be a result of its lower average price as well, 2.544 compared to Pepsi’s 2.7
  3. To describe the dataset, higher number of observations have been recorded in Germany (DE) - 25% of the observations followed by Japan (20%) and the US & the UK (10% each)
  4. As for the promotions, Coke was on promotion for 10% of the recorded weeks Pepsi for approximately 14% of the recorded weeks, giving higher
  5. During promotions, prices of both Coke and Pepsi dropped by ~ $0.011
  6. Comparing the sales during promotions vs that when there were none, both Coke’s and Pepsi’s sales increased by $40, however the % increase being much higher in the case of Pepsi
  7. Also, Coke suffered a drop of ~ $3 in sales when it had a promotion simultaneously with Pepsi, compared to when it had a promotion alone
  8. The effect was less pronounced on Pepsi, with it suffering just a drop of ~ $1 in sales when it had a promotion simultaneously with Coke, compared to when it had a promotion alone