Read the data using read.csv

#Read the data using read.csv
store.df = read.csv(paste("StoreData.csv", sep=""))

Q1a. Write R code to show the total number of rows and columns present in the data frame store.df

nrow(store.df)

## [1] 2080

ncol(store.df)

## [1] 10

Q1b. Write R code to list the names of the column present in the data frame store.df.

colnames(store.df)

##  [1] "storeNum" "Year"     "Week"     "p1sales"  "p2sales"  "p1price" 
##  [7] "p2price"  "p1prom"   "p2prom"   "country"

Q2a. Write R code to output the data types of the different columns.

str(store.df)

## 'data.frame':    2080 obs. of  10 variables:
##  $ storeNum: int  101 101 101 101 101 101 101 101 101 101 ...
##  $ Year    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Week    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ p1sales : int  127 137 156 117 138 115 116 106 116 145 ...
##  $ p2sales : int  106 105 97 106 100 127 90 126 94 91 ...
##  $ p1price : num  2.29 2.49 2.99 2.99 2.49 2.79 2.99 2.99 2.29 2.49 ...
##  $ p2price : num  2.29 2.49 2.99 3.19 2.59 2.49 3.19 2.29 2.29 2.99 ...
##  $ p1prom  : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ p2prom  : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ country : Factor w/ 7 levels "AU","BR","CN",..: 7 7 7 7 7 7 7 7 7 7 ...

Q2b. Write R code to convert the data types of the columns {Year, p1prom, p2prom, country} into factor variables.

Year.f = factor(store.df$Year, labels = c("0", "1"))
p1prom.f = factor(store.df$p1prom, labels = c("0", "1"))
p2prom.f = factor(store.df$p2prom, labels = c("0", "1"))
country.f = factor(store.df$country, labels = c("0", "1", "2", "3", "4", "5", "6"))

Q2c. Write R code to verify the conversions done in Q2b.

is.factor(country.f)

## [1] TRUE

is.factor(Year.f)

## [1] TRUE

is.factor(p1prom.f)

## [1] TRUE

is.factor(p2prom.f)

## [1] TRUE

Q3a. Write R code to generate the summary statistics of the different variables present in the data frame store.df.

summary(store.df)

##     storeNum          Year          Week          p1sales   
##  Min.   :101.0   Min.   :1.0   Min.   : 1.00   Min.   : 73  
##  1st Qu.:105.8   1st Qu.:1.0   1st Qu.:13.75   1st Qu.:113  
##  Median :110.5   Median :1.5   Median :26.50   Median :129  
##  Mean   :110.5   Mean   :1.5   Mean   :26.50   Mean   :133  
##  3rd Qu.:115.2   3rd Qu.:2.0   3rd Qu.:39.25   3rd Qu.:150  
##  Max.   :120.0   Max.   :2.0   Max.   :52.00   Max.   :263  
##                                                             
##     p2sales         p1price         p2price         p1prom   
##  Min.   : 51.0   Min.   :2.190   Min.   :2.29   Min.   :0.0  
##  1st Qu.: 84.0   1st Qu.:2.290   1st Qu.:2.49   1st Qu.:0.0  
##  Median : 96.0   Median :2.490   Median :2.59   Median :0.0  
##  Mean   :100.2   Mean   :2.544   Mean   :2.70   Mean   :0.1  
##  3rd Qu.:113.0   3rd Qu.:2.790   3rd Qu.:2.99   3rd Qu.:0.0  
##  Max.   :225.0   Max.   :2.990   Max.   :3.19   Max.   :1.0  
##                                                              
##      p2prom       country 
##  Min.   :0.0000   AU:104  
##  1st Qu.:0.0000   BR:208  
##  Median :0.0000   CN:208  
##  Mean   :0.1385   DE:520  
##  3rd Qu.:0.0000   GB:312  
##  Max.   :1.0000   JP:416  
##                   US:312

Q3b. Write R code to generate the following table using the describe() function from the psych package in R.

library("psych", lib.loc="~/R/win-library/3.5")
describe(store.df)

##          vars    n   mean    sd median trimmed   mad    min    max range
## storeNum    1 2080 110.50  5.77 110.50  110.50  7.41 101.00 120.00  19.0
## Year        2 2080   1.50  0.50   1.50    1.50  0.74   1.00   2.00   1.0
## Week        3 2080  26.50 15.01  26.50   26.50 19.27   1.00  52.00  51.0
## p1sales     4 2080 133.05 28.37 129.00  131.08 26.69  73.00 263.00 190.0
## p2sales     5 2080 100.16 24.42  96.00   98.05 22.24  51.00 225.00 174.0
## p1price     6 2080   2.54  0.29   2.49    2.53  0.44   2.19   2.99   0.8
## p2price     7 2080   2.70  0.33   2.59    2.69  0.44   2.29   3.19   0.9
## p1prom      8 2080   0.10  0.30   0.00    0.00  0.00   0.00   1.00   1.0
## p2prom      9 2080   0.14  0.35   0.00    0.05  0.00   0.00   1.00   1.0
## country*   10 2080   4.55  1.72   4.50    4.62  2.22   1.00   7.00   6.0
##           skew kurtosis   se
## storeNum  0.00    -1.21 0.13
## Year      0.00    -2.00 0.01
## Week      0.00    -1.20 0.33
## p1sales   0.74     0.66 0.62
## p2sales   0.99     1.51 0.54
## p1price   0.28    -1.44 0.01
## p2price   0.32    -1.40 0.01
## p1prom    2.66     5.10 0.01
## p2prom    2.09     2.38 0.01
## country* -0.29    -0.81 0.04

Q4a. Break-up store.df by country. Write R code to generate the following break-ups by country.

mytable3 <- with(store.df, table(country))
mytable3

## country
##  AU  BR  CN  DE  GB  JP  US 
## 104 208 208 520 312 416 312

Q4b. Write R code to express Q4a numbers in terms of percentages.

prop.table(mytable3)*100

## country
## AU BR CN DE GB JP US 
##  5 10 10 25 15 20 15

Q5a.1 Count the number of weeks, When both products were under promotion?

both_promo <- subset(store.df ,store.df$p1prom==1 & store.df$p2prom==1)
Count_weeks_both_promo <- xtabs(~ both_promo$storeNum , data=store.df)
Count_weeks_both_promo

## both_promo$storeNum
## 101 103 104 105 106 107 108 110 111 112 113 115 117 118 119 120 
##   1   4   1   1   2   1   2   1   2   1   4   4   1   3   3   1

This is the total week count (out of 104) at the store level for promotions of both products

Q5a.2 When product 1 (Coke) was under promotion but product 2 (Pepsi) was not under promotion

coke_promo <- subset(store.df ,store.df$p1prom==1 & store.df$p2prom==0)
Count_weeks_coke_promo <- xtabs(~ coke_promo$storeNum , data=store.df)
Count_weeks_coke_promo

## coke_promo$storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 
##   7   7   8   2  11   7   8  15   8   6   5   9  11   8   8  10  10  15 
## 119 120 
##  11  10

This is the total week count (out of 104) at the store level when Coke was promoted

Q5a.3 When product 1 (Coke) was not under promotion but product 2 (Pepsi) was under promotion?

pepsi_promo <- subset(store.df ,store.df$p1prom==0 & store.df$p2prom==1)
Count_weeks_pepsi_promo <- xtabs(~ pepsi_promo$storeNum , data=store.df)
Count_weeks_pepsi_promo

## pepsi_promo$storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 
##  11   9  20  10  14  20  12  16  12  13  12  15  15  18   9  13  12   9 
## 119 120 
##   9   7

This is the total week count (out of 104) at the store level when Pepsi was promoted

Q5a.4 When both products were not under promotion?

no_promo <- subset(store.df ,store.df$p1prom==0 & store.df$p2prom==0)
Count_weeks_no_promo <- xtabs(~ no_promo$storeNum , data=store.df)
Count_weeks_no_promo

## no_promo$storeNum
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 
##  85  88  72  91  78  75  83  71  84  84  85  79  74  78  83  81  81  77 
## 119 120 
##  81  86

This is the total week count (out of 104) at the store level with no promotions

Q5.b Express Q5a as in percentages.

Count_weeks_both_promo*100/104

## both_promo$storeNum
##       101       103       104       105       106       107       108 
## 0.9615385 3.8461538 0.9615385 0.9615385 1.9230769 0.9615385 1.9230769 
##       110       111       112       113       115       117       118 
## 0.9615385 1.9230769 0.9615385 3.8461538 3.8461538 0.9615385 2.8846154 
##       119       120 
## 2.8846154 0.9615385

This is the % of week the store had the promotions for both

Count_weeks_coke_promo*100/104

## coke_promo$storeNum
##       101       102       103       104       105       106       107 
##  6.730769  6.730769  7.692308  1.923077 10.576923  6.730769  7.692308 
##       108       109       110       111       112       113       114 
## 14.423077  7.692308  5.769231  4.807692  8.653846 10.576923  7.692308 
##       115       116       117       118       119       120 
##  7.692308  9.615385  9.615385 14.423077 10.576923  9.615385

This is the % of week the store had the promotions for Coke

Count_weeks_pepsi_promo*100/104

## pepsi_promo$storeNum
##       101       102       103       104       105       106       107 
## 10.576923  8.653846 19.230769  9.615385 13.461538 19.230769 11.538462 
##       108       109       110       111       112       113       114 
## 15.384615 11.538462 12.500000 11.538462 14.423077 14.423077 17.307692 
##       115       116       117       118       119       120 
##  8.653846 12.500000 11.538462  8.653846  8.653846  6.730769

This is the % of week the store had the promotions for Pepsi

Count_weeks_no_promo*100/104

## no_promo$storeNum
##      101      102      103      104      105      106      107      108 
## 81.73077 84.61538 69.23077 87.50000 75.00000 72.11538 79.80769 68.26923 
##      109      110      111      112      113      114      115      116 
## 80.76923 80.76923 81.73077 75.96154 71.15385 75.00000 79.80769 77.88462 
##      117      118      119      120 
## 77.88462 74.03846 77.88462 82.69231

This is the % of week the store had no promotion

5c Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when both were under promotion

mean(both_promo$p1price)

## [1] 2.568125

mean(both_promo$p2price)

## [1] 2.67125

5d Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when neither of them was under promotion.

mean(no_promo$p1price)

## [1] 2.543342

mean(no_promo$p2price)

## [1] 2.700891

Q5e. Write R code to calculate the average price of product 1 (Coke) and product 2 (Pepsi) when only one of them was under promotion.

one_promo <- rbind(coke_promo,pepsi_promo)
mean(one_promo$p1price)

## [1] 2.546481

mean(one_promo$p2price)

## [1] 2.696481

Q6. Analyze the above numbers. Think like a Senior Manager. Prepare a list of qualitative insights on the “Effect of Promotions on Sales” of Coke and Pepsi, to be shared in class.

# Revenue for Coke under different promotions
rev_coke_no_promo = no_promo$p1sales*no_promo$p1price
m_rev_coke_no_promo = mean(rev_coke_no_promo)
rev_coke_coke_promo = coke_promo$p1sales*coke_promo$p1price
m_rev_coke_coke_promo = mean(rev_coke_coke_promo)
rev_coke_pepsi_promo = pepsi_promo$p1sales*pepsi_promo$p1price
m_rev_coke_pepsi_promo = mean(rev_coke_pepsi_promo)
rev_coke_both_promo = both_promo$p1sales*both_promo$p1price
m_rev_coke_both_promo = mean(rev_coke_both_promo)
situation <- c("Pepsi Promo", "No promo", "Both Promo", "Coke Promo")
revenue <- c(m_rev_coke_pepsi_promo,m_rev_coke_no_promo,m_rev_coke_both_promo,m_rev_coke_coke_promo)
coke_revenue.df <- data.frame(situation,revenue)
barplot(coke_revenue.df$revenue,width=0.2,names.arg =coke_revenue.df$situation, main = "Revenue of coke under different promotions")

# Revenue for Pepsi under different promotions
rev_pepsi_no_promo = no_promo$p2sales*no_promo$p2price
m_rev_pepsi_no_promo = mean(rev_pepsi_no_promo)
rev_pepsi_coke_promo = coke_promo$p2sales*coke_promo$p2price
m_rev_pepsi_coke_promo = mean(rev_pepsi_coke_promo)
rev_pepsi_pepsi_promo = pepsi_promo$p2sales*pepsi_promo$p2price
m_rev_pepsi_pepsi_promo = mean(rev_pepsi_pepsi_promo)
rev_pepsi_both_promo = both_promo$p2sales*both_promo$p2price
m_rev_pepsi_both_promo = mean(rev_pepsi_both_promo)
situation <- c("Pepsi Promo", "No promo", "Both Promo", "Coke Promo")
revenue <- c(m_rev_pepsi_pepsi_promo,m_rev_pepsi_no_promo,m_rev_pepsi_both_promo,m_rev_pepsi_coke_promo)
pepsi_revenue.df <- data.frame(situation,revenue)
barplot(pepsi_revenue.df$revenue,width=0.2,names.arg =pepsi_revenue.df$situation, main = "Revenue of Pepsi under diferent promotions")

Class Participation tasks

summary(store.df$p2sales)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    51.0    84.0    96.0   100.2   113.0   225.0

#pepsi sales vis-a-vis promotions
mytable <- xtabs(~ p2sales+p2prom, data=store.df)
margin.table(mytable,2)

## p2prom
##    0    1 
## 1792  288

StoreDataAnalysis

Adarsh/pgp33209

September 21, 2018