1. Reading the CSV file

store.df=read.csv("StoreData.csv")

2. Column Data Types

str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ p2prom  : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
Year <- store.df$Year
Year.f <- factor(Year, labels = c("1", "2"))
p1prom.f <- factor(store.df$p1prom, labels = c("No", "Yes"))
p2prom.f <- factor(store.df$p2prom, labels = c("No", "Yes"))
store.df$Year <- Year.f
store.df$p1prom <- p1prom.f
store.df$p2prom <- p2prom.f
is.factor(store.df$Year)
## [1] TRUE
is.factor(store.df$p1prom)
## [1] TRUE
is.factor(store.df$p2prom)
## [1] TRUE
str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
##  $ p2prom  : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...

3. Summary Statistics

summary(store.df)
##     storeNum     Year          Week          p1sales       p2sales     
##  Min.   :101.0   1:1040   Min.   : 1.00   Min.   : 73   Min.   : 51.0  
##  1st Qu.:105.8   2:1040   1st Qu.:13.75   1st Qu.:113   1st Qu.: 84.0  
##  Median :110.5            Median :26.50   Median :129   Median : 96.0  
##  Mean   :110.5            Mean   :26.50   Mean   :133   Mean   :100.2  
##  3rd Qu.:115.2            3rd Qu.:39.25   3rd Qu.:150   3rd Qu.:113.0  
##  Max.   :120.0            Max.   :52.00   Max.   :263   Max.   :225.0  
##                                                                        
##     p1price         p2price     p1prom     p2prom     country 
##  Min.   :2.190   Min.   :2.29   No :1872   No :1792   AU:104  
##  1st Qu.:2.290   1st Qu.:2.49   Yes: 208   Yes: 288   BR:208  
##  Median :2.490   Median :2.59                         CN:208  
##  Mean   :2.544   Mean   :2.70                         DE:520  
##  3rd Qu.:2.790   3rd Qu.:2.99                         GB:312  
##  Max.   :2.990   Max.   :3.19                         JP:416  
##                                                       US:312
library("psych")
describe(store.df)
##          vars    n   mean    sd median trimmed   mad    min    max range
## storeNum    1 2080 110.50  5.77 110.50  110.50  7.41 101.00 120.00  19.0
## Year*       2 2080   1.50  0.50   1.50    1.50  0.74   1.00   2.00   1.0
## Week        3 2080  26.50 15.01  26.50   26.50 19.27   1.00  52.00  51.0
## p1sales     4 2080 133.05 28.37 129.00  131.08 26.69  73.00 263.00 190.0
## p2sales     5 2080 100.16 24.42  96.00   98.05 22.24  51.00 225.00 174.0
## p1price     6 2080   2.54  0.29   2.49    2.53  0.44   2.19   2.99   0.8
## p2price     7 2080   2.70  0.33   2.59    2.69  0.44   2.29   3.19   0.9
## p1prom*     8 2080   1.10  0.30   1.00    1.00  0.00   1.00   2.00   1.0
## p2prom*     9 2080   1.14  0.35   1.00    1.05  0.00   1.00   2.00   1.0
## country*   10 2080   4.55  1.72   4.50    4.62  2.22   1.00   7.00   6.0
##           skew kurtosis   se
## storeNum  0.00    -1.21 0.13
## Year*     0.00    -2.00 0.01
## Week      0.00    -1.20 0.33
## p1sales   0.74     0.66 0.62
## p2sales   0.99     1.51 0.54
## p1price   0.28    -1.44 0.01
## p2price   0.32    -1.40 0.01
## p1prom*   2.66     5.10 0.01
## p2prom*   2.09     2.38 0.01
## country* -0.29    -0.81 0.04

4. Country-wise Breakup

country <- with(store.df, table(country))
country
## country
##  AU  BR  CN  DE  GB  JP  US 
## 104 208 208 520 312 416 312
prop.table(country)*100
## country
## AU BR CN DE GB JP US 
##  5 10 10 25 15 20 15

5a. Promotion Stats

promotion <- xtabs(~p1prom+p2prom, data = store.df)
promotion
##       p2prom
## p1prom   No  Yes
##    No  1616  256
##    Yes  176   32
prop.table(promotion)*100
##       p2prom
## p1prom        No       Yes
##    No  77.692308 12.307692
##    Yes  8.461538  1.538462

5b. Price during Promotion

aggregate(store.df$p1price~store.df$p1prom+store.df$p2prom, FUN = mean)
##   store.df$p1prom store.df$p2prom store.df$p1price
## 1              No              No         2.543342
## 2             Yes              No         2.524659
## 3              No             Yes         2.561484
## 4             Yes             Yes         2.568125
aggregate(store.df$p2price~store.df$p1prom+store.df$p2prom, FUN = mean)
##   store.df$p1prom store.df$p2prom store.df$p2price
## 1              No              No         2.700891
## 2             Yes              No         2.703068
## 3              No             Yes         2.691953
## 4             Yes             Yes         2.671250

6. Effect of Promotions

aggregate(store.df$p1sales~store.df$p1prom+store.df$p2prom, FUN = mean)
##   store.df$p1prom store.df$p2prom store.df$p1sales
## 1              No              No         129.2630
## 2             Yes              No         169.3523
## 3              No             Yes         127.8242
## 4             Yes             Yes         166.3438
aggregate(store.df$p2sales~store.df$p1prom+store.df$p2prom, FUN = mean)
##   store.df$p1prom store.df$p2prom store.df$p2sales
## 1              No              No         94.87067
## 2             Yes              No         92.88636
## 3              No             Yes        134.35547
## 4             Yes             Yes        133.50000

The above table clearly shows that in spite of the lower price during promotions, overall sales of both products shot up.
When both the promotions were going on, the sales were slightly lesser than the time when only that product’s promotions were on.
Also, we see that during promotions, Coke’s (P1) sales increased by approx 31% while Pepsi’s (P2) sales increased by 41%. This may be due to the type and quality of promotion. So, the differences should be studied and can be used in the future.