Summary of pepsi sales in all stores

store.df = read.csv(paste("StoreData.csv", sep=""))
summary(store.df$p2sales)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    51.0    84.0    96.0   100.2   113.0   225.0
table = with(store.df, table(p2prom))
table
## p2prom
##    0    1 
## 1792  288

Percentage of times the promotions were running in all stores

prop.table(table)*100
## p2prom
##        0        1 
## 86.15385 13.84615

Total sales of pepsi in all stores for both the years

p2cal = sum(store.df$p2sales)
p2cal
## [1] 208326
aggregate(store.df$p2sales, by = list(Category = store.df$p2prom), sum)
##   Category      x
## 1        0 169659
## 2        1  38667

Average sales during promotions and during no promotions

169659/1792 #Average sales during no promitions
## [1] 94.67578
38667/288 #Average sales during promotions
## [1] 134.2604

Assignment Questions:

Q1

nrow(store.df)
## [1] 2080
ncol(store.df)
## [1] 10
colnames(store.df)
##  [1] "storeNum" "Year"     "Week"     "p1sales"  "p2sales"  "p1price" 
##  [7] "p2price"  "p1prom"   "p2prom"   "country"

Q2

sapply(store.df, class)
##  storeNum      Year      Week   p1sales   p2sales   p1price   p2price 
## "integer" "integer" "integer" "integer" "integer" "numeric" "numeric" 
##    p1prom    p2prom   country 
## "integer" "integer"  "factor"
cols = c("Year","p1prom","p2prom","country")
store.df[cols] <- lapply(store.df[cols], factor)
sapply(store.df, class)
##  storeNum      Year      Week   p1sales   p2sales   p1price   p2price 
## "integer"  "factor" "integer" "integer" "integer" "numeric" "numeric" 
##    p1prom    p2prom   country 
##  "factor"  "factor"  "factor"

Q3

store.df[cols] = lapply(store.df[cols], as.numeric)
sapply(store.df, class)
##  storeNum      Year      Week   p1sales   p2sales   p1price   p2price 
## "integer" "numeric" "integer" "integer" "integer" "numeric" "numeric" 
##    p1prom    p2prom   country 
## "numeric" "numeric" "numeric"
library(psych)
## Warning: package 'psych' was built under R version 3.5.1
describe(store.df$storeNum)
##    vars    n  mean   sd median trimmed  mad min max range skew kurtosis
## X1    1 2080 110.5 5.77  110.5   110.5 7.41 101 120    19    0    -1.21
##      se
## X1 0.13
describe(store.df$Year)
##    vars    n mean  sd median trimmed  mad min max range skew kurtosis   se
## X1    1 2080  1.5 0.5    1.5     1.5 0.74   1   2     1    0       -2 0.01
describe(store.df$Week)
##    vars    n mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 2080 26.5 15.01   26.5    26.5 19.27   1  52    51    0     -1.2
##      se
## X1 0.33
describe(store.df$p1sales)
##    vars    n   mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 2080 133.05 28.37    129  131.08 26.69  73 263   190 0.74     0.66
##      se
## X1 0.62
describe(store.df$p2sales)
##    vars    n   mean    sd median trimmed   mad min max range skew kurtosis
## X1    1 2080 100.16 24.42     96   98.05 22.24  51 225   174 0.99     1.51
##      se
## X1 0.54
describe(store.df$p1price)
##    vars    n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 2080 2.54 0.29   2.49    2.53 0.44 2.19 2.99   0.8 0.28    -1.44
##      se
## X1 0.01
describe(store.df$p2price)
##    vars    n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 2080  2.7 0.33   2.59    2.69 0.44 2.29 3.19   0.9 0.32     -1.4
##      se
## X1 0.01
describe(store.df$p1prom)
##    vars    n mean  sd median trimmed mad min max range skew kurtosis   se
## X1    1 2080  1.1 0.3      1       1   0   1   2     1 2.66      5.1 0.01
describe(store.df$p2prom)
##    vars    n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 2080 1.14 0.35      1    1.05   0   1   2     1 2.09     2.38 0.01
describe(store.df$country)
##    vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## X1    1 2080 4.55 1.72    4.5    4.62 2.22   1   7     6 -0.29    -0.81
##      se
## X1 0.04

Q4

summary(store.df$country)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.75    4.50    4.55    6.00    7.00
table = with(store.df, table(country))
prop.table(table)*100
## country
##  1  2  3  4  5  6  7 
##  5 10 10 25 15 20 15

Q5

count1 = length(which(store.df$p1prom==1 & store.df$p2prom==1)) #a1
count2 = length(which(store.df$p1prom==1 & store.df$p2prom==0)) #a2
count3 = length(which(store.df$p1prom==0 & store.df$p2prom==1)) #a3
count4 = length(which(store.df$p1prom==0 & store.df$p2prom==0)) #a4
count1
## [1] 1616
count2
## [1] 0
count3
## [1] 0
count4
## [1] 0
count1/2080*100 #b
## [1] 77.69231
count2/2080*100 #b
## [1] 0
count3/2080*100 #b
## [1] 0
count4/2080*100 #b
## [1] 0
aggregate(store.df$p1price, by = list(Category = store.df$p2prom & store.df$p1prom), mean) #c #d
##   Category        x
## 1     TRUE 2.544375
aggregate(store.df$p2price, by = list(Category = store.df$p2prom & store.df$p1prom), mean) #c #d
##   Category        x
## 1     TRUE 2.699519
avgprice1 = store.df[store.df$p1prom == 1 & store.df$p2prom == 0,][,6:7] #e
mean(avgprice1$p1price) #e
## [1] NaN
mean(avgprice1$p2price) #e
## [1] NaN
avgprice2 = store.df[store.df$p1prom == 0 & store.df$p2prom == 1,][,6:7] #e
mean(avgprice2$p1price) #e
## [1] NaN
mean(avgprice2$p2price) #e
## [1] NaN