Summary of pepsi sales in all stores
store.df = read.csv(paste("StoreData.csv", sep=""))
summary(store.df$p2sales)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 51.0 84.0 96.0 100.2 113.0 225.0
table = with(store.df, table(p2prom))
table
## p2prom
## 0 1
## 1792 288
Percentage of times the promotions were running in all stores
prop.table(table)*100
## p2prom
## 0 1
## 86.15385 13.84615
Total sales of pepsi in all stores for both the years
p2cal = sum(store.df$p2sales)
p2cal
## [1] 208326
aggregate(store.df$p2sales, by = list(Category = store.df$p2prom), sum)
## Category x
## 1 0 169659
## 2 1 38667
Average sales during promotions and during no promotions
169659/1792 #Average sales during no promitions
## [1] 94.67578
38667/288 #Average sales during promotions
## [1] 134.2604
Assignment Questions:
Q1
nrow(store.df)
## [1] 2080
ncol(store.df)
## [1] 10
colnames(store.df)
## [1] "storeNum" "Year" "Week" "p1sales" "p2sales" "p1price"
## [7] "p2price" "p1prom" "p2prom" "country"
Q2
sapply(store.df, class)
## storeNum Year Week p1sales p2sales p1price p2price
## "integer" "integer" "integer" "integer" "integer" "numeric" "numeric"
## p1prom p2prom country
## "integer" "integer" "factor"
cols = c("Year","p1prom","p2prom","country")
store.df[cols] <- lapply(store.df[cols], factor)
sapply(store.df, class)
## storeNum Year Week p1sales p2sales p1price p2price
## "integer" "factor" "integer" "integer" "integer" "numeric" "numeric"
## p1prom p2prom country
## "factor" "factor" "factor"
Q3
store.df[cols] = lapply(store.df[cols], as.numeric)
sapply(store.df, class)
## storeNum Year Week p1sales p2sales p1price p2price
## "integer" "numeric" "integer" "integer" "integer" "numeric" "numeric"
## p1prom p2prom country
## "numeric" "numeric" "numeric"
library(psych)
## Warning: package 'psych' was built under R version 3.5.1
describe(store.df$storeNum)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 110.5 5.77 110.5 110.5 7.41 101 120 19 0 -1.21
## se
## X1 0.13
describe(store.df$Year)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2080 1.5 0.5 1.5 1.5 0.74 1 2 1 0 -2 0.01
describe(store.df$Week)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 26.5 15.01 26.5 26.5 19.27 1 52 51 0 -1.2
## se
## X1 0.33
describe(store.df$p1sales)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 133.05 28.37 129 131.08 26.69 73 263 190 0.74 0.66
## se
## X1 0.62
describe(store.df$p2sales)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 100.16 24.42 96 98.05 22.24 51 225 174 0.99 1.51
## se
## X1 0.54
describe(store.df$p1price)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 2.54 0.29 2.49 2.53 0.44 2.19 2.99 0.8 0.28 -1.44
## se
## X1 0.01
describe(store.df$p2price)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 2.7 0.33 2.59 2.69 0.44 2.29 3.19 0.9 0.32 -1.4
## se
## X1 0.01
describe(store.df$p1prom)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2080 1.1 0.3 1 1 0 1 2 1 2.66 5.1 0.01
describe(store.df$p2prom)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 2080 1.14 0.35 1 1.05 0 1 2 1 2.09 2.38 0.01
describe(store.df$country)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 2080 4.55 1.72 4.5 4.62 2.22 1 7 6 -0.29 -0.81
## se
## X1 0.04
Q4
summary(store.df$country)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.75 4.50 4.55 6.00 7.00
table = with(store.df, table(country))
prop.table(table)*100
## country
## 1 2 3 4 5 6 7
## 5 10 10 25 15 20 15
Q5
count1 = length(which(store.df$p1prom==1 & store.df$p2prom==1)) #a1
count2 = length(which(store.df$p1prom==1 & store.df$p2prom==0)) #a2
count3 = length(which(store.df$p1prom==0 & store.df$p2prom==1)) #a3
count4 = length(which(store.df$p1prom==0 & store.df$p2prom==0)) #a4
count1
## [1] 1616
count2
## [1] 0
count3
## [1] 0
count4
## [1] 0
count1/2080*100 #b
## [1] 77.69231
count2/2080*100 #b
## [1] 0
count3/2080*100 #b
## [1] 0
count4/2080*100 #b
## [1] 0
aggregate(store.df$p1price, by = list(Category = store.df$p2prom & store.df$p1prom), mean) #c #d
## Category x
## 1 TRUE 2.544375
aggregate(store.df$p2price, by = list(Category = store.df$p2prom & store.df$p1prom), mean) #c #d
## Category x
## 1 TRUE 2.699519
avgprice1 = store.df[store.df$p1prom == 1 & store.df$p2prom == 0,][,6:7] #e
mean(avgprice1$p1price) #e
## [1] NaN
mean(avgprice1$p2price) #e
## [1] NaN
avgprice2 = store.df[store.df$p1prom == 0 & store.df$p2prom == 1,][,6:7] #e
mean(avgprice2$p1price) #e
## [1] NaN
mean(avgprice2$p2price) #e
## [1] NaN