changing working directory

setwd("C:/Users/Makka/Desktop/term 5/dam")

Reading csv and displaying table stats

store.df = read.csv("C:/Users/Makka/Desktop/term 5/dam/StoreData.csv",sep = ",")
View(store.df)
nrow(store.df)
## [1] 2080
ncol(store.df)
## [1] 10

attaching

attach(store.df)
table(storeNum)
## storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 
## 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 
## 119 120 
## 104 104
unique(Year)
## [1] 1 2
unique(p1prom)
## [1] 0 1
unique(p2prom)
## [1] 0 1
unique(country)
## [1] US DE GB BR JP AU CN
## Levels: AU BR CN DE GB JP US

Data Types and conversion to factors

str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ p2prom  : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
storeNum = factor(storeNum)
Year = c(1, 2)
p1prom = c(0, 1)
p2prom = c(0, 1)
Year = factor(Year)
p1prom = factor(p1prom)
p2prom = factor(p2prom)
store = data.frame(storeNum, Year, Week, p1sales, p2sales, p1price, p2price, p1prom, p2prom, country) 
str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ p2prom  : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...

summary stats

print("Coke Sales Summary")
## [1] "Coke Sales Summary"
summary(p1sales)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      73     113     129     133     150     263
boxplot(p1sales, xlab="Sales", ylab="Coke", main="Sale of Coke", horizontal = TRUE)

print("Pepsi Sales Summary")
## [1] "Pepsi Sales Summary"
summary(p2sales)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    51.0    84.0    96.0   100.2   113.0   225.0
boxplot(p2sales, xlab="Sales", ylab="Pepsi", main="Sale of Pepsi", horizontal = TRUE)
summary(store.df)
##     storeNum          Year          Week          p1sales   
##  Min.   :101.0   Min.   :1.0   Min.   : 1.00   Min.   : 73  
##  1st Qu.:105.8   1st Qu.:1.0   1st Qu.:13.75   1st Qu.:113  
##  Median :110.5   Median :1.5   Median :26.50   Median :129  
##  Mean   :110.5   Mean   :1.5   Mean   :26.50   Mean   :133  
##  3rd Qu.:115.2   3rd Qu.:2.0   3rd Qu.:39.25   3rd Qu.:150  
##  Max.   :120.0   Max.   :2.0   Max.   :52.00   Max.   :263  
##                                                             
##     p2sales         p1price         p2price         p1prom   
##  Min.   : 51.0   Min.   :2.190   Min.   :2.29   Min.   :0.0  
##  1st Qu.: 84.0   1st Qu.:2.290   1st Qu.:2.49   1st Qu.:0.0  
##  Median : 96.0   Median :2.490   Median :2.59   Median :0.0  
##  Mean   :100.2   Mean   :2.544   Mean   :2.70   Mean   :0.1  
##  3rd Qu.:113.0   3rd Qu.:2.790   3rd Qu.:2.99   3rd Qu.:0.0  
##  Max.   :225.0   Max.   :2.990   Max.   :3.19   Max.   :1.0  
##                                                              
##      p2prom       country 
##  Min.   :0.0000   AU:104  
##  1st Qu.:0.0000   BR:208  
##  Median :0.0000   CN:208  
##  Mean   :0.1385   DE:520  
##  3rd Qu.:0.0000   GB:312  
##  Max.   :1.0000   JP:416  
##                   US:312
psych::describe(store.df)

##          vars    n   mean    sd median trimmed   mad    min    max range
## storeNum    1 2080 110.50  5.77 110.50  110.50  7.41 101.00 120.00  19.0
## Year        2 2080   1.50  0.50   1.50    1.50  0.74   1.00   2.00   1.0
## Week        3 2080  26.50 15.01  26.50   26.50 19.27   1.00  52.00  51.0
## p1sales     4 2080 133.05 28.37 129.00  131.08 26.69  73.00 263.00 190.0
## p2sales     5 2080 100.16 24.42  96.00   98.05 22.24  51.00 225.00 174.0
## p1price     6 2080   2.54  0.29   2.49    2.53  0.44   2.19   2.99   0.8
## p2price     7 2080   2.70  0.33   2.59    2.69  0.44   2.29   3.19   0.9
## p1prom      8 2080   0.10  0.30   0.00    0.00  0.00   0.00   1.00   1.0
## p2prom      9 2080   0.14  0.35   0.00    0.05  0.00   0.00   1.00   1.0
## country*   10 2080   4.55  1.72   4.50    4.62  2.22   1.00   7.00   6.0
##           skew kurtosis   se
## storeNum  0.00    -1.21 0.13
## Year      0.00    -2.00 0.01
## Week      0.00    -1.20 0.33
## p1sales   0.74     0.66 0.62
## p2sales   0.99     1.51 0.54
## p1price   0.28    -1.44 0.01
## p2price   0.32    -1.40 0.01
## p1prom    2.66     5.10 0.01
## p2prom    2.09     2.38 0.01
## country* -0.29    -0.81 0.04

Data break up by country

table(country)
## country
##  AU  BR  CN  DE  GB  JP  US 
## 104 208 208 520 312 416 312
t = table(country)
prop.table(t)*100
## country
## AU BR CN DE GB JP US 
##  5 10 10 25 15 20 15

Question 5

Contingency table for Promotions

#store.df = read.csv("C:/Users/Makka/Desktop/term 5/dam/StoreData.csv",sep = ",")
#library(gmodels)
#promTable = CrossTable(store.df$p1prom, store.df$p2prom)
#promTable ag <- aggregate(value~facA+facB, FUN=mean)
promTable  <- xtabs(~p1prom+p2prom ,data = store.df)
promTable
##       p2prom
## p1prom    0    1
##      0 1616  256
##      1  176   32

Percentages:

prop.table(promTable)*100
##       p2prom
## p1prom         0         1
##      0 77.692308 12.307692
##      1  8.461538  1.538462

Price Comparison with neither product promoted

noProm = subset(store.df, store.df$p1prom == 0 & store.df$p2prom == 0)
mean(noProm$p1price)
## [1] 2.543342
mean(noProm$p2price)
## [1] 2.700891
sum(noProm$p1sales)
## [1] 208889
sum(noProm$p2sales)
## [1] 153311
xtabs(p1sales~country,data = noProm)
## country
##    AU    BR    CN    DE    GB    JP    US 
## 10314 20035 20974 51667 32899 41114 31886
xtabs(p2sales~country,data = noProm)
## country
##    AU    BR    CN    DE    GB    JP    US 
##  6984 14514 16257 37519 23893 30809 23335

Price Comparison with both products promoted

bothProm = subset(store.df, store.df$p1prom == 1 & store.df$p2prom == 1)
mean(bothProm$p1price)
## [1] 2.568125
mean(bothProm$p2price)
## [1] 2.67125
sum(bothProm$p1sales)
## [1] 5323
sum(bothProm$p2sales)
## [1] 4272
xtabs(p1sales~country,data = bothProm)
## country
##   AU   BR   CN   DE   GB   JP   US 
##  486  799  731 1139  477  868  823
xtabs(p2sales~country,data = bothProm)
## country
##  AU  BR  CN  DE  GB  JP  US 
## 400 745 504 975 418 596 634

Price Comparison with Coke promoted

p1Promo = subset(store.df, store.df$p1prom == 1 & store.df$p2prom == 0)
mean(p1Promo$p1price)
## [1] 2.524659
mean(p1Promo$p2price)
## [1] 2.703068
sum(p1Promo$p1sales)
## [1] 29806
sum(p1Promo$p2sales)
## [1] 16348
xtabs(p1sales~country,data = p1Promo)
## country
##   AU   BR   CN   DE   GB   JP   US 
## 2667 3274 3622 6964 2914 6552 3813
xtabs(p2sales~country,data = p1Promo)
## country
##   AU   BR   CN   DE   GB   JP   US 
## 1285 1939 1932 4140 1826 3168 2058

Price Comparison with Pepsi promoted

p2Promo = subset(store.df, store.df$p1prom == 0 & store.df$p2prom == 1)
mean(p2Promo$p1price)
## [1] 2.561484
mean(p2Promo$p2price)
## [1] 2.691953
sum(p2Promo$p1sales)
## [1] 32723
sum(p2Promo$p2sales)
## [1] 34395
xtabs(p1sales~country,data = p2Promo)
## country
##   AU   BR   CN   DE   GB   JP   US 
## 1077 3728 2054 9106 4696 6847 5215
xtabs(p2sales~country,data = p2Promo)
## country
##   AU   BR   CN   DE   GB   JP   US 
## 1265 4164 2218 9629 5127 6771 5221

Question 6

Following observations can be made:

  1. Pepsi is priced higher than coke with or without promotions.
  2. Consumers tend to buy coke more than pepsi, however, sale of pepsi is higher only when it is promoted and coke is not promoted.
  3. All countries prefer coke over pepsi as depicted by the sales figures.
  4. Japan is marginally loyal to Coke even when Pepsi is being promoted, other countries switch their favourite drink.
  5. Lower price –> Higher sales