changing working directory
setwd("C:/Users/Makka/Desktop/term 5/dam")
Reading csv and displaying table stats
store.df = read.csv("C:/Users/Makka/Desktop/term 5/dam/StoreData.csv",sep = ",")
View(store.df)
nrow(store.df)
## [1] 2080
ncol(store.df)
## [1] 10
attaching
attach(store.df)
table(storeNum)
## storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
## 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104 104
## 119 120
## 104 104
unique(Year)
## [1] 1 2
unique(p1prom)
## [1] 0 1
unique(p2prom)
## [1] 0 1
unique(country)
## [1] US DE GB BR JP AU CN
## Levels: AU BR CN DE GB JP US
Data Types and conversion to factors
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : int 0 0 1 0 0 0 0 0 0 0 ...
## $ p2prom : int 0 0 0 0 1 0 0 0 0 0 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
storeNum = factor(storeNum)
Year = c(1, 2)
p1prom = c(0, 1)
p2prom = c(0, 1)
Year = factor(Year)
p1prom = factor(p1prom)
p2prom = factor(p2prom)
store = data.frame(storeNum, Year, Week, p1sales, p2sales, p1price, p2price, p1prom, p2prom, country)
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : int 0 0 1 0 0 0 0 0 0 0 ...
## $ p2prom : int 0 0 0 0 1 0 0 0 0 0 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
summary stats
print("Coke Sales Summary")
## [1] "Coke Sales Summary"
summary(p1sales)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 73 113 129 133 150 263
boxplot(p1sales, xlab="Sales", ylab="Coke", main="Sale of Coke", horizontal = TRUE)

print("Pepsi Sales Summary")
## [1] "Pepsi Sales Summary"
summary(p2sales)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 51.0 84.0 96.0 100.2 113.0 225.0
boxplot(p2sales, xlab="Sales", ylab="Pepsi", main="Sale of Pepsi", horizontal = TRUE)
summary(store.df)
## storeNum Year Week p1sales
## Min. :101.0 Min. :1.0 Min. : 1.00 Min. : 73
## 1st Qu.:105.8 1st Qu.:1.0 1st Qu.:13.75 1st Qu.:113
## Median :110.5 Median :1.5 Median :26.50 Median :129
## Mean :110.5 Mean :1.5 Mean :26.50 Mean :133
## 3rd Qu.:115.2 3rd Qu.:2.0 3rd Qu.:39.25 3rd Qu.:150
## Max. :120.0 Max. :2.0 Max. :52.00 Max. :263
##
## p2sales p1price p2price p1prom
## Min. : 51.0 Min. :2.190 Min. :2.29 Min. :0.0
## 1st Qu.: 84.0 1st Qu.:2.290 1st Qu.:2.49 1st Qu.:0.0
## Median : 96.0 Median :2.490 Median :2.59 Median :0.0
## Mean :100.2 Mean :2.544 Mean :2.70 Mean :0.1
## 3rd Qu.:113.0 3rd Qu.:2.790 3rd Qu.:2.99 3rd Qu.:0.0
## Max. :225.0 Max. :2.990 Max. :3.19 Max. :1.0
##
## p2prom country
## Min. :0.0000 AU:104
## 1st Qu.:0.0000 BR:208
## Median :0.0000 CN:208
## Mean :0.1385 DE:520
## 3rd Qu.:0.0000 GB:312
## Max. :1.0000 JP:416
## US:312
psych::describe(store.df)

## vars n mean sd median trimmed mad min max range
## storeNum 1 2080 110.50 5.77 110.50 110.50 7.41 101.00 120.00 19.0
## Year 2 2080 1.50 0.50 1.50 1.50 0.74 1.00 2.00 1.0
## Week 3 2080 26.50 15.01 26.50 26.50 19.27 1.00 52.00 51.0
## p1sales 4 2080 133.05 28.37 129.00 131.08 26.69 73.00 263.00 190.0
## p2sales 5 2080 100.16 24.42 96.00 98.05 22.24 51.00 225.00 174.0
## p1price 6 2080 2.54 0.29 2.49 2.53 0.44 2.19 2.99 0.8
## p2price 7 2080 2.70 0.33 2.59 2.69 0.44 2.29 3.19 0.9
## p1prom 8 2080 0.10 0.30 0.00 0.00 0.00 0.00 1.00 1.0
## p2prom 9 2080 0.14 0.35 0.00 0.05 0.00 0.00 1.00 1.0
## country* 10 2080 4.55 1.72 4.50 4.62 2.22 1.00 7.00 6.0
## skew kurtosis se
## storeNum 0.00 -1.21 0.13
## Year 0.00 -2.00 0.01
## Week 0.00 -1.20 0.33
## p1sales 0.74 0.66 0.62
## p2sales 0.99 1.51 0.54
## p1price 0.28 -1.44 0.01
## p2price 0.32 -1.40 0.01
## p1prom 2.66 5.10 0.01
## p2prom 2.09 2.38 0.01
## country* -0.29 -0.81 0.04
Data break up by country
table(country)
## country
## AU BR CN DE GB JP US
## 104 208 208 520 312 416 312
t = table(country)
prop.table(t)*100
## country
## AU BR CN DE GB JP US
## 5 10 10 25 15 20 15
Question 5
Percentages:
prop.table(promTable)*100
## p2prom
## p1prom 0 1
## 0 77.692308 12.307692
## 1 8.461538 1.538462
Question 6
Following observations can be made:
- Pepsi is priced higher than coke with or without promotions.
- Consumers tend to buy coke more than pepsi, however, sale of pepsi is higher only when it is promoted and coke is not promoted.
- All countries prefer coke over pepsi as depicted by the sales figures.
- Japan is marginally loyal to Coke even when Pepsi is being promoted, other countries switch their favourite drink.
- Lower price –> Higher sales