#Read StoreData.csv from DAM 2018 and store it in dataframe
setwd("C:/Users/Anshumaan/Desktop/2018 DAM")
store.df <- read.csv("StoreData.csv",header=TRUE)

************************************************************************

#Q1.a Write R code to show the total number of rows and columns present in the data frame store.df
Rdimensions <- dim(store.df)
print(paste0("Number of rows are ",Rdimensions[1]," and Number of Columns are ",Rdimensions[2]))
## [1] "Number of rows are 2080 and Number of Columns are 10"

************************************************************************

#Q1b. Write R code to list the names of the column present in the data frame store.df
print(colnames(store.df))
##  [1] "storeNum" "Year"     "Week"     "p1sales"  "p2sales"  "p1price" 
##  [7] "p2price"  "p1prom"   "p2prom"   "country"

************************************************************************

#Q2a. Write R code to output the data types of the different columns.
str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ p2prom  : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...

************************************************************************

#Q2b. Write R code to convert the data types of the columns {Year, p1prom, p2prom, country} into factor variables
store.df$Year <- as.factor(store.df$Year)
store.df$p1prom <- as.factor(store.df$p1prom)
store.df$p2prom <- as.factor(store.df$p2prom)
store.df$country <- as.factor(store.df$country)

************************************************************************

#Q2c. Write R code to verify the conversions done in Q2b
#Answer1
str(store.df)
## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
##  $ p2prom  : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...
#Answer2
is.factor(store.df$Year)
## [1] TRUE
is.factor(store.df$p1prom)
## [1] TRUE
is.factor(store.df$p2prom)
## [1] TRUE
is.factor(store.df$country)
## [1] TRUE

************************************************************************

#Q3a. Write R code to generate the summary statistics of the different variables present in the data frame  store.df
summary(store.df)
##     storeNum     Year          Week          p1sales       p2sales     
##  Min.   :101.0   1:1040   Min.   : 1.00   Min.   : 73   Min.   : 51.0  
##  1st Qu.:105.8   2:1040   1st Qu.:13.75   1st Qu.:113   1st Qu.: 84.0  
##  Median :110.5            Median :26.50   Median :129   Median : 96.0  
##  Mean   :110.5            Mean   :26.50   Mean   :133   Mean   :100.2  
##  3rd Qu.:115.2            3rd Qu.:39.25   3rd Qu.:150   3rd Qu.:113.0  
##  Max.   :120.0            Max.   :52.00   Max.   :263   Max.   :225.0  
##                                                                        
##     p1price         p2price     p1prom   p2prom   country 
##  Min.   :2.190   Min.   :2.29   0:1872   0:1792   AU:104  
##  1st Qu.:2.290   1st Qu.:2.49   1: 208   1: 288   BR:208  
##  Median :2.490   Median :2.59                     CN:208  
##  Mean   :2.544   Mean   :2.70                     DE:520  
##  3rd Qu.:2.790   3rd Qu.:2.99                     GB:312  
##  Max.   :2.990   Max.   :3.19                     JP:416  
##                                                   US:312

************************************************************************

#Q3b. Write R code to generate the following table using the describe() function from the psych package in R.
library(psych)
psych::describe(store.df)

************************************************************************

#QUESTION 4
#Q4a. Break-up store.df by country. Write R code to generate the following break-ups by country.
mytable <- with(store.df, table(country)) 
print(mytable)
## country
##  AU  BR  CN  DE  GB  JP  US 
## 104 208 208 520 312 416 312
#Q4b. Write R code to express Q4a numbers in terms of percentages.
prop.table(mytable)*100
## country
## AU BR CN DE GB JP US 
##  5 10 10 25 15 20 15

************************************************************************

#Q5a.The dataset has 2080 rows of weekly data for 20 unique store branches for two different years. For product 1 (Coke) and product 2 (Pepsi), write R code to count the number of weeks,
#When both products were under promotion?
#When product 1 (Coke) was under promotion but product 2 (Pepsi) was not under promotion?
#When product 1 (Coke) was not under promotion but product 2 (Pepsi) was under promotion?
#When neither product was under promotion?
mytable <- xtabs(~ p1prom+p2prom, data=store.df)
print(mytable) # frequencies
##       p2prom
## p1prom    0    1
##      0 1616  256
##      1  176   32
#Q5b. Express Q5a as in percentages
prop.table(mytable)*100
##       p2prom
## p1prom         0         1
##      0 77.692308 12.307692
##      1  8.461538  1.538462

************************************************************************

#Q5c. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when both were under promotion.
Avg_p1 <- mean(store.df$p1price[ which( store.df$p1prom ==1 & store.df$p2prom ==1 )])
Avg_p2 <- mean(store.df$p2price[which(store.df$p1prom ==1 & store.df$p2prom==1)])
print(paste0("Average Price of product1 When both are under promotion:",Avg_p1))
## [1] "Average Price of product1 When both are under promotion:2.568125"
print(paste0("Average Price of product2 When both are under promotion:",Avg_p2))
## [1] "Average Price of product2 When both are under promotion:2.67125"

************************************************************************

#Q5d. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when neither of them was under promotion.
Avg_p1_d <- mean(store.df$p1price[ which( store.df$p1prom ==0 & store.df$p2prom ==0 )])
Avg_p2_d <- mean(store.df$p2price[which(store.df$p1prom ==0 & store.df$p2prom==0)])
print(paste0("Average Price of product1 When neither of them are under promotion:",Avg_p1_d))
## [1] "Average Price of product1 When neither of them are under promotion:2.54334158415842"
print(paste0("Average Price of product2 When neither of them are under promotion:",Avg_p2_d))
## [1] "Average Price of product2 When neither of them are under promotion:2.70089108910891"

************************************************************************

#Q5e. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when only one of them was under promotion.
#Part 1 : When Product 1 was under promotion and product 2 is not
Part1_p1 <- mean(store.df$p1price[ which( store.df$p1prom ==1 & store.df$p2prom ==0 )])
Part1_p2 <- mean(store.df$p2price[ which( store.df$p1prom ==1 & store.df$p2prom ==0 )])
print(paste0("Price of product1 When Product 1 was under promotion and product 2 is not:",Part1_p1))
## [1] "Price of product1 When Product 1 was under promotion and product 2 is not:2.52465909090909"
print(paste0("Price of product2 When Product 1 was under promotion and product 2 is not:",Part1_p2))
## [1] "Price of product2 When Product 1 was under promotion and product 2 is not:2.70306818181818"
#Part 2 : When Product 2 was under promotion and product 1 is not
Part2_p1 <- mean(store.df$p1price[ which( store.df$p1prom ==0 & store.df$p2prom ==1 )])
Part2_p2 <- mean(store.df$p2price[ which( store.df$p1prom ==0 & store.df$p2prom ==1 )])
print(paste0("Price of product1 When Product 2 was under promotion and product 1 is not:",Part2_p1))
## [1] "Price of product1 When Product 2 was under promotion and product 1 is not:2.561484375"
print(paste0("Price of product2 When Product 2 was under promotion and product 1 is not:",Part2_p2))
## [1] "Price of product2 When Product 2 was under promotion and product 1 is not:2.691953125"

************************************************************************

# Q6. Analyze the above numbers. Think like a Senior Manager. Prepare a list of qualitative insights on the "Effect of Promotions on Sales" of Coke and Pepsi, to be shared in class.
mean(store.df$p1sales[ which( store.df$p1prom ==1)])
## [1] 168.8894
mean(store.df$p1sales[ which( store.df$p1prom ==0)])
## [1] 129.0662
mean(store.df$p2sales[ which( store.df$p2prom ==1)])
## [1] 134.2604
mean(store.df$p2sales[ which( store.df$p2prom ==0)])
## [1] 94.67578
# 1) Coke has better market power because it is lesser on promotion in comparison to Pepsi 
# 2) Both of them are not on promotion 80% of the times means they sell even when not on promotion 
# 3) Coke sells a lower price and that is how they are trying to gain market share by playing on the price 
# 4) Even both were under promo, Pepsi dropped it's price by 30cents whereas Coke increased by 2cents. Point being that Pepsi tweaks it's prices more that Coke whereas Coke is stable and EDLP types