#Read the data using read.csv
store.df = read.csv(paste("StoreData.csv", sep=""))
nrow(store.df)
## [1] 2080
ncol(store.df)
## [1] 10
colnames(store.df)
## [1] "storeNum" "Year" "Week" "p1sales" "p2sales" "p1price"
## [7] "p2price" "p1prom" "p2prom" "country"
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : int 0 0 1 0 0 0 0 0 0 0 ...
## $ p2prom : int 0 0 0 0 1 0 0 0 0 0 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
Year.f = factor(store.df$Year, labels = c("0", "1"))
p1prom.f = factor(store.df$p1prom, labels = c("0", "1"))
p2prom.f = factor(store.df$p2prom, labels = c("0", "1"))
country.f = factor(store.df$country, labels = c("0", "1", "2", "3", "4", "5", "6"))
is.factor(country.f)
## [1] TRUE
is.factor(Year.f)
## [1] TRUE
is.factor(p1prom.f)
## [1] TRUE
is.factor(p2prom.f)
## [1] TRUE
summary(store.df)
## storeNum Year Week p1sales
## Min. :101.0 Min. :1.0 Min. : 1.00 Min. : 73
## 1st Qu.:105.8 1st Qu.:1.0 1st Qu.:13.75 1st Qu.:113
## Median :110.5 Median :1.5 Median :26.50 Median :129
## Mean :110.5 Mean :1.5 Mean :26.50 Mean :133
## 3rd Qu.:115.2 3rd Qu.:2.0 3rd Qu.:39.25 3rd Qu.:150
## Max. :120.0 Max. :2.0 Max. :52.00 Max. :263
##
## p2sales p1price p2price p1prom
## Min. : 51.0 Min. :2.190 Min. :2.29 Min. :0.0
## 1st Qu.: 84.0 1st Qu.:2.290 1st Qu.:2.49 1st Qu.:0.0
## Median : 96.0 Median :2.490 Median :2.59 Median :0.0
## Mean :100.2 Mean :2.544 Mean :2.70 Mean :0.1
## 3rd Qu.:113.0 3rd Qu.:2.790 3rd Qu.:2.99 3rd Qu.:0.0
## Max. :225.0 Max. :2.990 Max. :3.19 Max. :1.0
##
## p2prom country
## Min. :0.0000 AU:104
## 1st Qu.:0.0000 BR:208
## Median :0.0000 CN:208
## Mean :0.1385 DE:520
## 3rd Qu.:0.0000 GB:312
## Max. :1.0000 JP:416
## US:312
library("psych", lib.loc="~/R/win-library/3.5")
describe(store.df)
## vars n mean sd median trimmed mad min max range
## storeNum 1 2080 110.50 5.77 110.50 110.50 7.41 101.00 120.00 19.0
## Year 2 2080 1.50 0.50 1.50 1.50 0.74 1.00 2.00 1.0
## Week 3 2080 26.50 15.01 26.50 26.50 19.27 1.00 52.00 51.0
## p1sales 4 2080 133.05 28.37 129.00 131.08 26.69 73.00 263.00 190.0
## p2sales 5 2080 100.16 24.42 96.00 98.05 22.24 51.00 225.00 174.0
## p1price 6 2080 2.54 0.29 2.49 2.53 0.44 2.19 2.99 0.8
## p2price 7 2080 2.70 0.33 2.59 2.69 0.44 2.29 3.19 0.9
## p1prom 8 2080 0.10 0.30 0.00 0.00 0.00 0.00 1.00 1.0
## p2prom 9 2080 0.14 0.35 0.00 0.05 0.00 0.00 1.00 1.0
## country* 10 2080 4.55 1.72 4.50 4.62 2.22 1.00 7.00 6.0
## skew kurtosis se
## storeNum 0.00 -1.21 0.13
## Year 0.00 -2.00 0.01
## Week 0.00 -1.20 0.33
## p1sales 0.74 0.66 0.62
## p2sales 0.99 1.51 0.54
## p1price 0.28 -1.44 0.01
## p2price 0.32 -1.40 0.01
## p1prom 2.66 5.10 0.01
## p2prom 2.09 2.38 0.01
## country* -0.29 -0.81 0.04
mytable3 <- with(store.df, table(country))
mytable3
## country
## AU BR CN DE GB JP US
## 104 208 208 520 312 416 312
prop.table(mytable3)*100
## country
## AU BR CN DE GB JP US
## 5 10 10 25 15 20 15
both_promo <- subset(store.df ,store.df$p1prom==1 & store.df$p2prom==1)
Count_weeks_both_promo <- xtabs(~ both_promo$storeNum , data=store.df)
Count_weeks_both_promo
## both_promo$storeNum
## 101 103 104 105 106 107 108 110 111 112 113 115 117 118 119 120
## 1 4 1 1 2 1 2 1 2 1 4 4 1 3 3 1
This is the total week count (out of 104) at the store level for promotions of both products
coke_promo <- subset(store.df ,store.df$p1prom==1 & store.df$p2prom==0)
Count_weeks_coke_promo <- xtabs(~ coke_promo$storeNum , data=store.df)
Count_weeks_coke_promo
## coke_promo$storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
## 7 7 8 2 11 7 8 15 8 6 5 9 11 8 8 10 10 15
## 119 120
## 11 10
This is the total week count (out of 104) at the store level when Coke was promoted
pepsi_promo <- subset(store.df ,store.df$p1prom==0 & store.df$p2prom==1)
Count_weeks_pepsi_promo <- xtabs(~ pepsi_promo$storeNum , data=store.df)
Count_weeks_pepsi_promo
## pepsi_promo$storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
## 11 9 20 10 14 20 12 16 12 13 12 15 15 18 9 13 12 9
## 119 120
## 9 7
This is the total week count (out of 104) at the store level when Pepsi was promoted
no_promo <- subset(store.df ,store.df$p1prom==0 & store.df$p2prom==0)
Count_weeks_no_promo <- xtabs(~ no_promo$storeNum , data=store.df)
Count_weeks_no_promo
## no_promo$storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
## 85 88 72 91 78 75 83 71 84 84 85 79 74 78 83 81 81 77
## 119 120
## 81 86
This is the total week count (out of 104) at the store level with no promotions
Count_weeks_both_promo*100/104
## both_promo$storeNum
## 101 103 104 105 106 107 108
## 0.9615385 3.8461538 0.9615385 0.9615385 1.9230769 0.9615385 1.9230769
## 110 111 112 113 115 117 118
## 0.9615385 1.9230769 0.9615385 3.8461538 3.8461538 0.9615385 2.8846154
## 119 120
## 2.8846154 0.9615385
This is the % of week the store had the promotions for both
Count_weeks_coke_promo*100/104
## coke_promo$storeNum
## 101 102 103 104 105 106 107
## 6.730769 6.730769 7.692308 1.923077 10.576923 6.730769 7.692308
## 108 109 110 111 112 113 114
## 14.423077 7.692308 5.769231 4.807692 8.653846 10.576923 7.692308
## 115 116 117 118 119 120
## 7.692308 9.615385 9.615385 14.423077 10.576923 9.615385
This is the % of week the store had the promotions for Coke
Count_weeks_pepsi_promo*100/104
## pepsi_promo$storeNum
## 101 102 103 104 105 106 107
## 10.576923 8.653846 19.230769 9.615385 13.461538 19.230769 11.538462
## 108 109 110 111 112 113 114
## 15.384615 11.538462 12.500000 11.538462 14.423077 14.423077 17.307692
## 115 116 117 118 119 120
## 8.653846 12.500000 11.538462 8.653846 8.653846 6.730769
This is the % of week the store had the promotions for Pepsi
Count_weeks_no_promo*100/104
## no_promo$storeNum
## 101 102 103 104 105 106 107 108
## 81.73077 84.61538 69.23077 87.50000 75.00000 72.11538 79.80769 68.26923
## 109 110 111 112 113 114 115 116
## 80.76923 80.76923 81.73077 75.96154 71.15385 75.00000 79.80769 77.88462
## 117 118 119 120
## 77.88462 74.03846 77.88462 82.69231
This is the % of week the store had no promotion
mean(both_promo$p1price)
## [1] 2.568125
mean(both_promo$p2price)
## [1] 2.67125
mean(no_promo$p1price)
## [1] 2.543342
mean(no_promo$p2price)
## [1] 2.700891
one_promo <- rbind(coke_promo,pepsi_promo)
mean(one_promo$p1price)
## [1] 2.546481
mean(one_promo$p2price)
## [1] 2.696481