#Read StoreData.csv from DAM 2018 and store it in dataframe
setwd("C:/Users/Anshumaan/Desktop/2018 DAM")
store.df <- read.csv("StoreData.csv",header=TRUE)
************************************************************************
#Q1.a Write R code to show the total number of rows and columns present in the data frame store.df
Rdimensions <- dim(store.df)
print(paste0("Number of rows are ",Rdimensions[1]," and Number of Columns are ",Rdimensions[2]))
## [1] "Number of rows are 2080 and Number of Columns are 10"
************************************************************************
#Q1b. Write R code to list the names of the column present in the data frame store.df
print(colnames(store.df))
## [1] "storeNum" "Year" "Week" "p1sales" "p2sales" "p1price"
## [7] "p2price" "p1prom" "p2prom" "country"
************************************************************************
#Q2a. Write R code to output the data types of the different columns.
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : int 0 0 1 0 0 0 0 0 0 0 ...
## $ p2prom : int 0 0 0 0 1 0 0 0 0 0 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
************************************************************************
#Q2b. Write R code to convert the data types of the columns {Year, p1prom, p2prom, country} into factor variables
store.df$Year <- as.factor(store.df$Year)
store.df$p1prom <- as.factor(store.df$p1prom)
store.df$p2prom <- as.factor(store.df$p2prom)
store.df$country <- as.factor(store.df$country)
************************************************************************
#Q2c. Write R code to verify the conversions done in Q2b
#Answer1
str(store.df)
## 'data.frame': 2080 obs. of 10 variables:
## $ storeNum: int 101 101 101 101 101 101 101 101 101 101 ...
## $ Year : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ p1sales : int 127 137 156 117 138 115 116 106 116 145 ...
## $ p2sales : int 106 105 97 106 100 127 90 126 94 91 ...
## $ p1price : num 2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
## $ p2price : num 2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
## $ p1prom : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ p2prom : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
## $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
#Answer2
is.factor(store.df$Year)
## [1] TRUE
is.factor(store.df$p1prom)
## [1] TRUE
is.factor(store.df$p2prom)
## [1] TRUE
is.factor(store.df$country)
## [1] TRUE
************************************************************************
#Q3a. Write R code to generate the summary statistics of the different variables present in the data frame store.df
summary(store.df)
## storeNum Year Week p1sales p2sales
## Min. :101.0 1:1040 Min. : 1.00 Min. : 73 Min. : 51.0
## 1st Qu.:105.8 2:1040 1st Qu.:13.75 1st Qu.:113 1st Qu.: 84.0
## Median :110.5 Median :26.50 Median :129 Median : 96.0
## Mean :110.5 Mean :26.50 Mean :133 Mean :100.2
## 3rd Qu.:115.2 3rd Qu.:39.25 3rd Qu.:150 3rd Qu.:113.0
## Max. :120.0 Max. :52.00 Max. :263 Max. :225.0
##
## p1price p2price p1prom p2prom country
## Min. :2.190 Min. :2.29 0:1872 0:1792 AU:104
## 1st Qu.:2.290 1st Qu.:2.49 1: 208 1: 288 BR:208
## Median :2.490 Median :2.59 CN:208
## Mean :2.544 Mean :2.70 DE:520
## 3rd Qu.:2.790 3rd Qu.:2.99 GB:312
## Max. :2.990 Max. :3.19 JP:416
## US:312
************************************************************************
#Q3b. Write R code to generate the following table using the describe() function from the psych package in R.
library(psych)
psych::describe(store.df)
************************************************************************
#QUESTION 4
#Q4a. Break-up store.df by country. Write R code to generate the following break-ups by country.
mytable <- with(store.df, table(country))
print(mytable)
## country
## AU BR CN DE GB JP US
## 104 208 208 520 312 416 312
#Q4b. Write R code to express Q4a numbers in terms of percentages.
prop.table(mytable)*100
## country
## AU BR CN DE GB JP US
## 5 10 10 25 15 20 15
************************************************************************
#Q5a.The dataset has 2080 rows of weekly data for 20 unique store branches for two different years. For product 1 (Coke) and product 2 (Pepsi), write R code to count the number of weeks,
#When both products were under promotion?
#When product 1 (Coke) was under promotion but product 2 (Pepsi) was not under promotion?
#When product 1 (Coke) was not under promotion but product 2 (Pepsi) was under promotion?
#When neither product was under promotion?
mytable <- xtabs(~ p1prom+p2prom, data=store.df)
print(mytable) # frequencies
## p2prom
## p1prom 0 1
## 0 1616 256
## 1 176 32
#Q5b. Express Q5a as in percentages
prop.table(mytable)*100
## p2prom
## p1prom 0 1
## 0 77.692308 12.307692
## 1 8.461538 1.538462
************************************************************************
#Q5c. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when both were under promotion.
Avg_p1 <- mean(store.df$p1price[ which( store.df$p1prom ==1 & store.df$p2prom ==1 )])
Avg_p2 <- mean(store.df$p2price[which(store.df$p1prom ==1 & store.df$p2prom==1)])
print(paste0("Average Price of product1 When both are under promotion:",Avg_p1))
## [1] "Average Price of product1 When both are under promotion:2.568125"
print(paste0("Average Price of product2 When both are under promotion:",Avg_p2))
## [1] "Average Price of product2 When both are under promotion:2.67125"
************************************************************************
#Q5d. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when neither of them was under promotion.
Avg_p1_d <- mean(store.df$p1price[ which( store.df$p1prom ==0 & store.df$p2prom ==0 )])
Avg_p2_d <- mean(store.df$p2price[which(store.df$p1prom ==0 & store.df$p2prom==0)])
print(paste0("Average Price of product1 When neither of them are under promotion:",Avg_p1_d))
## [1] "Average Price of product1 When neither of them are under promotion:2.54334158415842"
print(paste0("Average Price of product2 When neither of them are under promotion:",Avg_p2_d))
## [1] "Average Price of product2 When neither of them are under promotion:2.70089108910891"
************************************************************************
#Q5e. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when only one of them was under promotion.
#Part 1 : When Product 1 was under promotion and product 2 is not
Part1_p1 <- mean(store.df$p1price[ which( store.df$p1prom ==1 & store.df$p2prom ==0 )])
Part1_p2 <- mean(store.df$p2price[ which( store.df$p1prom ==1 & store.df$p2prom ==0 )])
print(paste0("Price of product1 When Product 1 was under promotion and product 2 is not:",Part1_p1))
## [1] "Price of product1 When Product 1 was under promotion and product 2 is not:2.52465909090909"
print(paste0("Price of product2 When Product 1 was under promotion and product 2 is not:",Part1_p2))
## [1] "Price of product2 When Product 1 was under promotion and product 2 is not:2.70306818181818"
#Part 2 : When Product 2 was under promotion and product 1 is not
Part2_p1 <- mean(store.df$p1price[ which( store.df$p1prom ==0 & store.df$p2prom ==1 )])
Part2_p2 <- mean(store.df$p2price[ which( store.df$p1prom ==0 & store.df$p2prom ==1 )])
print(paste0("Price of product1 When Product 2 was under promotion and product 1 is not:",Part2_p1))
## [1] "Price of product1 When Product 2 was under promotion and product 1 is not:2.561484375"
print(paste0("Price of product2 When Product 2 was under promotion and product 1 is not:",Part2_p2))
## [1] "Price of product2 When Product 2 was under promotion and product 1 is not:2.691953125"
************************************************************************
# Q6. Analyze the above numbers. Think like a Senior Manager. Prepare a list of qualitative insights on the "Effect of Promotions on Sales" of Coke and Pepsi, to be shared in class.
mean(store.df$p1sales[ which( store.df$p1prom ==1)])
## [1] 168.8894
mean(store.df$p1sales[ which( store.df$p1prom ==0)])
## [1] 129.0662
mean(store.df$p2sales[ which( store.df$p2prom ==1)])
## [1] 134.2604
mean(store.df$p2sales[ which( store.df$p2prom ==0)])
## [1] 94.67578
# 1) Coke has better market power because it is lesser on promotion in comparison to Pepsi
# 2) Both of them are not on promotion 80% of the times means they sell even when not on promotion
# 3) Coke sells a lower price and that is how they are trying to gain market share by playing on the price
# 4) Even both were under promo, Pepsi dropped it's price by 30cents whereas Coke increased by 2cents. Point being that Pepsi tweaks it's prices more that Coke whereas Coke is stable and EDLP types