# Clear environment of variables and functions
rm(list = ls(all = TRUE)) 

# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)

1 Load Package and Data

1.1 Load Package

#load package 
library(gridExtra)
library(GGally)

## Loading required package: ggplot2

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ tibble  2.1.3     ✔ purrr   0.3.2
## ✔ tidyr   1.0.0     ✔ dplyr   0.8.3
## ✔ readr   1.3.1     ✔ stringr 1.4.0
## ✔ tibble  2.1.3     ✔ forcats 0.4.0

## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()

library(gridExtra)
library(janitor) # for tyble

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(lmPerm)  # for ANOVA
library(formattable)# For table formatting and table formatting functions
library(htmltools)

1.2 Data Loading and Cleaning

#Data Loading 
my_data <- read.csv("mtp_data.csv")

# Set Factor
my_data$promo <- as.factor(my_data$promo)
my_data$iri_key <- as.factor(my_data$iri_key)
my_data <- my_data %>% mutate(sales = price*units) 


# Create Sales Column 
my_data <- my_data %>% mutate(sales = price*units)


#create producer column
GM_brand <- c("GENERAL MILLS CHEERIOS", "GENERAL MILLS CINNAMON TST CR","GENERAL MILLS COCOA PUFFS","GENERAL MILLS KIX","GENERAL MILLS LUCKY CHARMS")
K_brand <- c("KELLOGGS COCOA KRISPIES","KELLOGGS FROOT LOOPS","KELLOGGS FROSTED FLAKES","KELLOGGS FROSTED MINI WHEATS","KELLOGGS RAISIN BRAN","KELLOGGS RICE KRISPIES","KELLOGGS SMART START","KELLOGGS SPECIAL K")
P_brand <- c("POST GRAPE NUTS","POST SHREDDED WHEAT")

my_data <- my_data %>% mutate(producer = as.factor(ifelse(brand %in% GM_brand , "GM",
                                               ifelse(brand %in% K_brand, "KELLOGGS","POST" ))))

GM_record <- my_data %>% filter(producer == "GM") 
K_record <- my_data %>% filter(producer =="KELLOGGS")
P_record <- my_data %>% filter(producer == "POST")


#view data
head(my_data)

##                 UPC iri_key week units                         brand
## 1 00-01-16000-11653  644347 1484     5 GENERAL MILLS CINNAMON TST CR
## 2 00-01-16000-11653  248741 1483     2 GENERAL MILLS CINNAMON TST CR
## 3 00-01-16000-11653  535806 1489     3 GENERAL MILLS CINNAMON TST CR
## 4 00-01-16000-11945  675634 1489     2        GENERAL MILLS CHEERIOS
## 5 00-01-16000-11945  205272 1491     8        GENERAL MILLS CHEERIOS
## 6 00-01-16000-11945  248741 1492     5        GENERAL MILLS CHEERIOS
##           flavor package volume price promo   ad sales producer
## 1 CINNAMON TOAST     BOX   0.06   0.5     0    A   2.5       GM
## 2 CINNAMON TOAST     BOX   0.06   0.5     0 NONE   1.0       GM
## 3 CINNAMON TOAST     BOX   0.06   0.5     0 NONE   1.5       GM
## 4        TOASTED     BOX   0.04   0.5     0 NONE   1.0       GM
## 5        TOASTED     BOX   0.04   0.5     0 NONE   4.0       GM
## 6        TOASTED     BOX   0.04   0.5     0 NONE   2.5       GM

2 Base EDA for all brand in Cereal Market

2.1 Uni-variable Non-Graphic

summary(my_data)

print(paste("Number of UPC:",nrow(my_data %>% group_by(UPC)%>%summarise(n()))))
print(paste("Number of Store :",nrow(my_data %>% group_by(iri_key)%>%summarise(n()))))
print(paste("Number of brand : " ,nrow(my_data %>% group_by(brand)%>%summarise(n()))))

Attributes Analysis

prodcut attributes :

UPC : unique product number (114 kinds)
producer : (3 types)
brand : (15 kinds)
flavor : (5 kinds)
package : (2 types)

Senerio :

week: transit time (52 weeks)
iri_key: store number (1420 locations)

Strategy:

promo : (0/1)
ad: none / A / B (3 types)

Outcome :

Unit: certain UPC that a store sells in unit per week
sales; unit * price

2.2 Uni-variable Graphic

Analysis of Whole Cereal Market

price_plot <- 
  ggplot(my_data,aes(x = price)) + 
  geom_histogram(binwidth = 0.2) + 
  labs(title="Price Distribution")


sale_trend <- 
  my_data %>% 
  group_by(week)%>% 
  summarise(summy = n() ) %>% 
  ggplot(aes(x = week, y = summy)) + 
  geom_line(stat = "identity")+
  labs(title = "Sales Trend")

unit_plot <- 
  ggplot(my_data,aes(x = units)) + 
  geom_bar() + 
  labs(title="Units Distribution")

producer_plot <- 
  ggplot(my_data, aes(x = producer, y = sum(sales)))+
  geom_bar(stat = "identity")+
  scale_y_continuous(labels = scales::dollar)+
  labs(title = "Accumulated Sales by producer", y = "sales")

flavor_plot <- 
  my_data %>% 
  group_by(flavor)%>% 
  summarise(countty = n()) %>% 
  ggplot(aes(x = reorder(flavor, -countty), y = countty))+
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = scales::dollar)+
  labs(title="Accumulated Sales by flavor", y = "sales")

volumn_plot<- 
  ggplot(my_data,aes(x =  volume))+
  geom_freqpoly() +
  scale_y_continuous(labels = scales::dollar)+
  labs(title="Accumulated Sales by volume", y="sales")

price_plot

sale_trend

unit_plot

producer_plot

flavor_plot

volumn_plot

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Findings:

Price distribution is right-skewed.
The total sales record is punctuated without a significant pattern.
Per store per UPC sold generally less than 6 units.
Kelloggs sells much better than us for total sales, but we are better than POST.
Customers are looking for regular and toasted cereal.
Customers are looking for a volume between 0.5-1.5.

2.3 Multi-variable Non-Graphic

my_data %>% tabyl(brand,flavor)

##                          brand CINNAMON TOAST COCOA FRUIT REGULAR TOASTED
##         GENERAL MILLS CHEERIOS              0     0     0       4    1454
##  GENERAL MILLS CINNAMON TST CR           1834     0     0       0       0
##      GENERAL MILLS COCOA PUFFS              0  1020     0       0       0
##              GENERAL MILLS KIX              0     0     0    1196       0
##     GENERAL MILLS LUCKY CHARMS              0     0     0       3    1678
##        KELLOGGS COCOA KRISPIES              0   881     0       0       0
##           KELLOGGS FROOT LOOPS              0     0  2192       0       0
##        KELLOGGS FROSTED FLAKES              0     0     0    2295       0
##   KELLOGGS FROSTED MINI WHEATS              0     0     0    1574       0
##           KELLOGGS RAISIN BRAN              0     0     0    1266       0
##         KELLOGGS RICE KRISPIES              0     0     0       0    1450
##           KELLOGGS SMART START              0     0     0       0    1134
##             KELLOGGS SPECIAL K              0     0     0       0    1391
##                POST GRAPE NUTS              0     0     0    1289       0
##            POST SHREDDED WHEAT              0     0     0    1189       0

Assumption:

We assumed that when consumers shop for cereal, the main factor is flavor and package. But since there are not many sales records coming from the cup, so we will ignore this factor and focus mainly on flavor.
We will ignore the 4 packages of regular from GENERAL MILLS CHEERIOS for simplicity.

2.4 Multi-variable Graphic

2.4.1 Brand Analysis

grid.arrange(
  
# market share by sales
my_data 
  %>%group_by(brand,flavor,producer) 
  %>% summarise(summy_sales = sum(sales)) 
  %>% ggplot(aes(x = flavor, y = summy_sales, fill = brand))
  + facet_grid(producer~.)
  + geom_bar(stat = "identity", position = "dodge") 
  + labs(title = "Brand sale with Flavor by Producer"),


ncol =1 
)

Finding: 1. On flavor Regular, we lost a lot of sales. we only have one brand, comparing to Lelloggs who has three. This may be one of the reasons we are losing sales.

We might consider selling fruit flavor

2.4.2 Flavor Analysis

grid.arrange(
  
# market share by sales
my_data 
  %>%group_by(producer,flavor) 
  %>% summarise(summy_sales = sum(sales)) 
  %>% ggplot(aes(x = producer, y = summy_sales, fill = flavor)) 
  + geom_bar(stat = "identity", position = "dodge") 
  + labs(title = "Market Share with Flavor by Sales"),


# market share by units
my_data 
  %>%group_by(producer,flavor) 
  %>% summarise(summy_units = sum(units)) 
  %>% ggplot(aes(x = producer, y = summy_units, fill = flavor)) 
  + geom_bar(stat = "identity", position = "dodge") 
  + labs(title = "Market Share with Flavo by unit"),

ncol =1 
)

Finding : 1. We win in flavor cinnamon, cocoa and toasted. 2. we lost in regular to both competitive brands. The difference between us and POST is not obvious, so we will do a t.test to double-check

Test for the above finidng

# GM with Kelloggs
t.test(my_data$sales[my_data$producer == "GM" & my_data$flavor == "REGULAR"],my_data$sales[my_data$producer == "KELLOGGS" & my_data$flavor == "REGULAR"])

## 
##  Welch Two Sample t-test
## 
## data:  my_data$sales[my_data$producer == "GM" & my_data$flavor == "REGULAR"] and my_data$sales[my_data$producer == "KELLOGGS" & my_data$flavor == my_data$sales[my_data$producer == "GM" & my_data$flavor == "REGULAR"] and     "REGULAR"]
## t = -10.657, df = 1951.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.262868 -6.383443
## sample estimates:
## mean of x mean of y 
##  26.98179  34.80494

# GM with POST
t.test(my_data$sales[my_data$producer == "GM" & my_data$flavor == "REGULAR"],my_data$sales[my_data$producer == "POST" & my_data$flavor == "REGULAR"])

## 
##  Welch Two Sample t-test
## 
## data:  my_data$sales[my_data$producer == "GM" & my_data$flavor == "REGULAR"] and my_data$sales[my_data$producer == "POST" & my_data$flavor == my_data$sales[my_data$producer == "GM" & my_data$flavor == "REGULAR"] and     "REGULAR"]
## t = 5.6018, df = 2113.1, p-value = 2.398e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.757104 5.727343
## sample estimates:
## mean of x mean of y 
##  26.98179  22.73956

Test Result :

P_vaule with Kelloggs : < 2.2e-16

P_vaule with POST : 2.398e-08

We lost sales in regular to both competitors.

2.4.3 Package and sales

grid.arrange(

# by flavor
my_data 
  %>%group_by(flavor,package) 
  %>% summarise(sum_sales = sum(sales)) 
  %>% ggplot(aes(x = flavor, y = sum_sales, fill = package)) 
  + geom_bar(stat = "identity", position = "dodge") 
  + labs(title = "market share by package"),

# by producer
my_data 
  %>%group_by(producer,package) 
  %>% summarise(sum_units = sum(units)) 
  %>% ggplot(aes(x = producer, y = sum_units, fill = package)) 
  + geom_bar(stat = "identity", position = "dodge") 
  + labs(title = "market share by package"),


ncol =1
)

Finding :

Sales from the cup only occupied a really small portion of all flavors and all producers, so that we will ignore this factor in further analysis.

2.4.4 Price analysis

grid.arrange(

my_data  %>% 
  group_by(flavor, producer) %>%
  summarise( unit_prcie = median( price/volume)) %>%
  ggplot(aes(x = producer, y = unit_prcie, fill = flavor)) +
  geom_bar(stat = "identity",position = "dodge"),


my_data %>% 
  group_by(brand,producer,flavor) %>% 
  mutate(unit_prcie = (price/volume)) %>% 
  ggplot(aes(x = producer, y = unit_prcie,fill=flavor))+
  geom_boxplot(),


ncol = 1
)

Finding :

To generalize the price effect, we divided the price by volume to get the average price per quantity.

Our price is much higher among all flavors comparing to both competitors.

3 Finding 1 -Some Strategies We Used DID NOT Make Difference

3.1 Promotion Evaluation

3.1.1 Premise:

For the promotion analysis, we will analyze both sales and units. Sale increases will bring more revenue, and unit increases will increase market share. Both are executive goals.

grid.arrange(
  
#By sales
GM_record %>%
  group_by(brand, promo)%>%
  summarise(mean_sales = mean(sales))%>%
  ggplot(aes(x = brand, y = mean_sales, fill = promo))+
  geom_bar(stat = "identity", position = "dodge")+
  coord_flip()+
  labs (title = "Promotion Impact on Sales"),


#By unit
GM_record %>%
  group_by(brand, promo)%>%
  summarise(mean_unit = mean(units))%>%
  ggplot(aes(x = brand, y = mean_unit, fill = promo))+
  geom_bar(stat = "identity", position = "dodge")+
  coord_flip()+
  labs (title = "Promotion Impact on unit"),

ncol = 1
)

Finding :

Promotion does increase the units in all brands, for sales as well except Cheerios.

3.1.2 T.Test For Promotion Effect

3.1.2.1 GENERAL MILLS CHEERIOS

# 1 GENERAL MILLS CHEERIOS -  sale decrease / unit increase

z <- qnorm(0.975)

GM_Cherrios <- my_data%>% filter(brand == "GENERAL MILLS CHEERIOS")


# evaluate promotion with T test 
t.test(GM_Cherrios$sales[GM_Cherrios$promo == "0"],GM_Cherrios$sales[GM_Cherrios$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_Cherrios$sales[GM_Cherrios$promo == "0"] and GM_Cherrios$sales[GM_Cherrios$promo == "1"]
## t = 4.0241, df = 331.68, p-value = 7.09e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   3.948273 11.500092
## sample estimates:
## mean of x mean of y 
##  52.34634  44.62216

t.test(GM_Cherrios$units[GM_Cherrios$promo == "0"],GM_Cherrios$units[GM_Cherrios$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_Cherrios$units[GM_Cherrios$promo == "0"] and GM_Cherrios$units[GM_Cherrios$promo == "1"]
## t = -2.7163, df = 299.5, p-value = 0.006985
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.5048813 -0.4002089
## sample estimates:
## mean of x mean of y 
##  13.79516  15.24771

# Test visulization
GM_Cherrios %>%
  group_by(promo) %>% 
  summarise(m = median(sales),sd = sd(sales),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = promo,y = m,fill = promo)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

# reuslt 
CHEERIOS_TOAST <- c("Decrease","Increase")

Test result :

Sale decrease / Unit increase

3.1.2.2 GENERAL MILLS CINNAMON

# 2 GENERAL MILLS CINNAMON TST CR - sale increase / unit increase

GM_CINNAMON <- my_data%>% filter(brand == "GENERAL MILLS CINNAMON TST CR")

# evaluate promotion with T test 
t.test(GM_CINNAMON$sales[GM_CINNAMON$promo == "0"],GM_CINNAMON$sales[GM_CINNAMON$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_CINNAMON$sales[GM_CINNAMON$promo == "0"] and GM_CINNAMON$sales[GM_CINNAMON$promo == "1"]
## t = -2.3776, df = 465.66, p-value = 0.01783
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.2480936 -0.5934813
## sample estimates:
## mean of x mean of y 
##  33.59999  37.02077

t.test(GM_CINNAMON$units[GM_CINNAMON$promo == "0"],GM_CINNAMON$units[GM_CINNAMON$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_CINNAMON$units[GM_CINNAMON$promo == "0"] and GM_CINNAMON$units[GM_CINNAMON$promo == "1"]
## t = -10.182, df = 381.12, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.226214 -3.534405
## sample estimates:
## mean of x mean of y 
##  8.033832 12.414141

# Test visulization
GM_CINNAMON %>%
  group_by(promo) %>% 
  summarise(m = median(sales),sd = sd(sales),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = promo,y = m,fill = promo)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

# reuslt 
TST_CINNAMON <- c("Increase","Increase")

Test result :

Sale increase / Unit increase

3.1.2.3 GENERAL MILLS COCOA PUFFS

# 3  GENERAL MILLS COCOA PUFFS - sale increase / unit increase

GM_cocoa <- my_data%>% filter(brand == "GENERAL MILLS COCOA PUFFS")

# evaluate promotion with T test 
t.test(GM_cocoa$sales[GM_cocoa$promo == "0"],GM_cocoa$sales[GM_cocoa$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_cocoa$sales[GM_cocoa$promo == "0"] and GM_cocoa$sales[GM_cocoa$promo == "1"]
## t = -4.0746, df = 422.34, p-value = 5.507e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.459441 -2.953708
## sample estimates:
## mean of x mean of y 
##  24.33817  30.04474

t.test(GM_cocoa$units[GM_cocoa$promo == "0"],GM_cocoa$units[GM_cocoa$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_cocoa$units[GM_cocoa$promo == "0"] and GM_cocoa$units[GM_cocoa$promo == "1"]
## t = -8.4894, df = 345.28, p-value = 6.234e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.481580 -3.419378
## sample estimates:
## mean of x mean of y 
##  6.888166 11.338645

# Test visulization
GM_cocoa %>%
  group_by(promo) %>% 
  summarise(m = median(sales),sd = sd(sales),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = promo,y = m,fill = promo)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

# reuslt 
PUFFS_COCOA <- c("Increase","Increase")

Test result :

Sale increase / Unit increase

3.1.2.4 GENERAL MILLS KIX

# 4  GENERAL MILLS KIX - sale no differece !! / unit increase

GM_KIX <- my_data%>% filter(brand == "GENERAL MILLS KIX")

# Evaluate promotion with T test
t.test(GM_KIX$sales[GM_KIX$promo == "0"],GM_KIX$sales[GM_KIX$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_KIX$sales[GM_KIX$promo == "0"] and GM_KIX$sales[GM_KIX$promo == "1"]
## t = -0.34451, df = 335.06, p-value = 0.7307
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.471542  2.436775
## sample estimates:
## mean of x mean of y 
##  26.89814  27.41553

t.test(GM_KIX$units[GM_KIX$promo == "0"],GM_KIX$units[GM_KIX$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_KIX$units[GM_KIX$promo == "0"] and GM_KIX$units[GM_KIX$promo == "1"]
## t = -5.7239, df = 254.31, p-value = 2.922e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.166095 -2.033207
## sample estimates:
## mean of x mean of y 
##  7.076229 10.175879

# Test visulization 
GM_KIX %>%
  group_by(promo) %>% 
  summarise(m = median(sales),sd = sd(sales),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = promo,y = m,fill = promo)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

# reuslt 
KIX_REGULAR <- c("No Difference ","Increase")

Test result :

sale no differece !! / Unit increase

3.1.2.5 GENERAL MILLS LUCKY CHARMS

# 5 GENERAL MILLS LUCKY CHARMS - sale iNcrease / unit increase

GM_LUCKY <- my_data%>% filter(brand == "GENERAL MILLS LUCKY CHARMS")

# Evaluate promotion with T test
t.test(GM_LUCKY$sales[GM_LUCKY$promo == "0"],GM_LUCKY$sales[GM_LUCKY$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_LUCKY$sales[GM_LUCKY$promo == "0"] and GM_LUCKY$sales[GM_LUCKY$promo == "1"]
## t = -2.9748, df = 476.11, p-value = 0.003081
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.561649 -1.545826
## sample estimates:
## mean of x mean of y 
##  35.36537  39.91911

t.test(GM_LUCKY$units[GM_LUCKY$promo == "0"],GM_LUCKY$units[GM_LUCKY$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_LUCKY$units[GM_LUCKY$promo == "0"] and GM_LUCKY$units[GM_LUCKY$promo == "1"]
## t = -10.78, df = 413.78, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.582192 -3.860398
## sample estimates:
## mean of x mean of y 
##  8.234261 12.955556

# Test visulization 
GM_LUCKY %>%
  group_by(promo) %>% 
  summarise(m = median(sales),sd = sd(sales),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = promo,y = m,fill = promo)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

# reuslt 
LUCKY_TOAST <- c("Increase ","Increase")

Test result :

Sale increase / Unit increase

3.2 Advertising Evaluation

3.2.1 Premise:

For the promotion analysis, we will analyze only the unit, since the price should not be different.

#By unit
GM_record %>%
  group_by(brand, ad)%>%
  summarise(mean_unit = mean(units))%>%
  ggplot(aes(x = brand, y = mean_unit, fill = ad))+
  geom_bar(stat = "identity", position = "dodge")+
  coord_flip()+
  labs (title = "Ad Impact on unit")

Finding:

Small and median ad both increased the unit sold. However, median ad didn’t always do much better than small ad.

3.2.2 T.test for Ad Impact

3.2.2.1 GENERAL MILLS CHEERIOS

# 1 GENERAL MILLS CHEERIOS
GM_Cherrios <- my_data%>% filter(brand == "GENERAL MILLS CHEERIOS")

# Check if there are enough of poeple in each group 
GM_Cherrios %>% group_by(ad)%>% summarise(n())

## # A tibble: 3 x 2
##   ad    `n()`
##   <fct> <int>
## 1 A        79
## 2 B        44
## 3 NONE   1335

# T test 

t.test(GM_Cherrios$units[GM_Cherrios$ad == "NONE"],GM_Cherrios$units[GM_Cherrios$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_Cherrios$units[GM_Cherrios$ad == "NONE"] and GM_Cherrios$units[GM_Cherrios$ad == "A"]
## t = -0.31128, df = 86.065, p-value = 0.7563
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.099007  1.530648
## sample estimates:
## mean of x mean of y 
##  13.91835  14.20253

t.test(GM_Cherrios$units[GM_Cherrios$ad == "NONE"],GM_Cherrios$units[GM_Cherrios$ad == "B"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_Cherrios$units[GM_Cherrios$ad == "NONE"] and GM_Cherrios$units[GM_Cherrios$ad == "B"]
## t = -2.6607, df = 46.833, p-value = 0.01065
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.5737406 -0.6350098
## sample estimates:
## mean of x mean of y 
##  13.91835  16.52273

t.test(GM_Cherrios$units[GM_Cherrios$ad == "B"],GM_Cherrios$units[GM_Cherrios$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_Cherrios$units[GM_Cherrios$ad == "B"] and GM_Cherrios$units[GM_Cherrios$ad == "A"]
## t = 1.7736, df = 105.85, p-value = 0.07901
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.273517  4.913908
## sample estimates:
## mean of x mean of y 
##  16.52273  14.20253

# test visulization
GM_Cherrios %>%
  group_by(ad) %>% 
  summarise(m = median(units),sd = sd(units),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = ad,y = m, fill = ad)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

GM_Cherrios_ad <- c("TOAST","No Difference","Increase", "No Difference")

Test result : None - A : no difference / None - B : increase /A -B : no difference

3.2.2.2 GENERAL MILLS CINNAMON TST CR

# 2 GENERAL MILLS CINNAMON TST CR
GM_CINNAMON <- my_data%>% filter(brand == "GENERAL MILLS CINNAMON TST CR")

# Check if there are enough of poeple in each group 
GM_CINNAMON %>% group_by(ad)%>% summarise(n())

## # A tibble: 3 x 2
##   ad    `n()`
##   <fct> <int>
## 1 A       114
## 2 B        51
## 3 NONE   1669

# T test 

t.test(GM_CINNAMON$units[GM_CINNAMON$ad == "NONE"],GM_CINNAMON$units[GM_CINNAMON$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_CINNAMON$units[GM_CINNAMON$ad == "NONE"] and GM_CINNAMON$units[GM_CINNAMON$ad == "A"]
## t = -3.8624, df = 124.53, p-value = 0.0001795
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.002526 -1.290327
## sample estimates:
## mean of x mean of y 
##  8.502696 11.149123

t.test(GM_CINNAMON$units[GM_CINNAMON$ad == "NONE"],GM_CINNAMON$units[GM_CINNAMON$ad == "B"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_CINNAMON$units[GM_CINNAMON$ad == "NONE"] and GM_CINNAMON$units[GM_CINNAMON$ad == "B"]
## t = -2.6003, df = 52.081, p-value = 0.01209
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.8412422 -0.6239536
## sample estimates:
## mean of x mean of y 
##  8.502696 11.235294

t.test(GM_CINNAMON$units[GM_CINNAMON$ad == "B"],GM_CINNAMON$units[GM_CINNAMON$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_CINNAMON$units[GM_CINNAMON$ad == "B"] and GM_CINNAMON$units[GM_CINNAMON$ad == "A"]
## t = 0.069684, df = 92.85, p-value = 0.9446
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.369541  2.541884
## sample estimates:
## mean of x mean of y 
##  11.23529  11.14912

# test visulization
GM_CINNAMON%>%
  group_by(ad) %>% 
  summarise(m = median(units),sd = sd(units),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = ad,y = m, fill = ad)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

GM_CINNAMON_ad <- c("CINNAMON","Increase","Increase", "No Difference")

Test result : None - A : increase / None - B : increase /A -B : no difference

3.2.2.3 GENERAL MILLS CHEERIOS

# 3 GENERAL MILLS COCOA PUFFS
GM_COCOA <- my_data%>% filter(brand == "GENERAL MILLS COCOA PUFFS")

# Check if there are enough of poeple in each group 
GM_COCOA %>% group_by(ad)%>% summarise(n())

## # A tibble: 3 x 2
##   ad    `n()`
##   <fct> <int>
## 1 A        76
## 2 B        54
## 3 NONE    890

# T test 

t.test(GM_COCOA$units[GM_COCOA$ad == "NONE"],GM_COCOA$units[GM_COCOA$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_COCOA$units[GM_COCOA$ad == "NONE"] and GM_COCOA$units[GM_COCOA$ad == "A"]
## t = -3.7672, df = 83.723, p-value = 0.0003061
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.169132 -1.597219
## sample estimates:
## mean of x mean of y 
##  7.524719 10.907895

t.test(GM_COCOA$units[GM_COCOA$ad == "NONE"],GM_COCOA$units[GM_COCOA$ad == "B"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_COCOA$units[GM_COCOA$ad == "NONE"] and GM_COCOA$units[GM_COCOA$ad == "B"]
## t = -3.614, df = 57.162, p-value = 0.0006378
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.062705 -1.739709
## sample estimates:
## mean of x mean of y 
##  7.524719 11.425926

t.test(GM_COCOA$units[GM_COCOA$ad == "B"],GM_COCOA$units[GM_COCOA$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_COCOA$units[GM_COCOA$ad == "B"] and GM_COCOA$units[GM_COCOA$ad == "A"]
## t = 0.37729, df = 112.76, p-value = 0.7067
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.202269  3.238331
## sample estimates:
## mean of x mean of y 
##  11.42593  10.90789

# test visulization
GM_COCOA %>%
  group_by(ad) %>% 
  summarise(m = median(units),sd = sd(units),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = ad,y = m, fill = ad)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

GM_COCOA_ad <- c("COCOA","Increase","Increase", "No Difference")

Test result : None - A : increase / None - B : increase /A -B : no difference

3.2.2.4 GENERAL MILLS KIX

# 4 KIX
GM_KIX <- my_data%>% filter(brand == "GENERAL MILLS KIX")

# Check if there are enough of poeple in each group 
GM_KIX %>% group_by(ad)%>% summarise(n())

## # A tibble: 3 x 2
##   ad    `n()`
##   <fct> <int>
## 1 A        70
## 2 B        54
## 3 NONE   1072

# T test 

t.test(GM_KIX$units[GM_KIX$ad == "NONE"],GM_KIX$units[GM_KIX$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_KIX$units[GM_KIX$ad == "NONE"] and GM_KIX$units[GM_KIX$ad == "A"]
## t = -2.8929, df = 75.017, p-value = 0.004994
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.4411898 -0.8189381
## sample estimates:
## mean of x mean of y 
##  7.298507  9.928571

t.test(GM_KIX$units[GM_KIX$ad == "NONE"],GM_KIX$units[GM_KIX$ad == "B"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_KIX$units[GM_KIX$ad == "NONE"] and GM_KIX$units[GM_KIX$ad == "B"]
## t = -3.0389, df = 56.648, p-value = 0.003589
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.127052 -1.053711
## sample estimates:
## mean of x mean of y 
##  7.298507 10.388889

t.test(GM_KIX$units[GM_KIX$ad == "B"],GM_KIX$units[GM_KIX$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_KIX$units[GM_KIX$ad == "B"] and GM_KIX$units[GM_KIX$ad == "A"]
## t = 0.34377, df = 114.87, p-value = 0.7316
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.192052  3.112687
## sample estimates:
## mean of x mean of y 
## 10.388889  9.928571

# test visulization
GM_KIX %>%
  group_by(ad) %>% 
  summarise(m = median(units),sd = sd(units),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = ad,y = m, fill = ad)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

GM_KIX_ad <- c("REGULAR","Increase","Increase", "No Difference")

Test result : None - A : increase / None - B : increase /A -B : no difference

3.2.2.5 GENERAL MILLS LUCKY CHARMS

# 5 GENERAL MILLS CHEERIOS
GM_LUCKY <- my_data %>% filter(brand == "GENERAL MILLS LUCKY CHARMS")

# Check if there are enough of poeple in each group 
GM_LUCKY %>% group_by(ad)%>% summarise(n())

## # A tibble: 3 x 2
##   ad    `n()`
##   <fct> <int>
## 1 A       103
## 2 B        69
## 3 NONE   1509

# T test 

t.test(GM_LUCKY$units[GM_LUCKY$ad == "NONE"],GM_LUCKY$units[GM_LUCKY$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_LUCKY$units[GM_LUCKY$ad == "NONE"] and GM_LUCKY$units[GM_LUCKY$ad == "A"]
## t = -5.3748, df = 112.61, p-value = 4.192e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.370615 -2.477610
## sample estimates:
## mean of x mean of y 
##  8.784626 12.708738

t.test(GM_LUCKY$units[GM_LUCKY$ad == "NONE"],GM_LUCKY$units[GM_LUCKY$ad == "B"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_LUCKY$units[GM_LUCKY$ad == "NONE"] and GM_LUCKY$units[GM_LUCKY$ad == "B"]
## t = -2.662, df = 73.003, p-value = 0.009547
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.0006751 -0.5750012
## sample estimates:
## mean of x mean of y 
##  8.784626 11.072464

t.test(GM_LUCKY$units[GM_LUCKY$ad == "B"],GM_LUCKY$units[GM_LUCKY$ad == "A"])

## 
##  Welch Two Sample t-test
## 
## data:  GM_LUCKY$units[GM_LUCKY$ad == "B"] and GM_LUCKY$units[GM_LUCKY$ad == "A"]
## t = -1.4814, df = 148.93, p-value = 0.1406
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.8189469  0.5463988
## sample estimates:
## mean of x mean of y 
##  11.07246  12.70874

# test visulization
GM_LUCKY %>%
  group_by(ad) %>% 
  summarise(m = median(units),sd = sd(units),n = n(), ci = z*sd/sqrt(n))%>% ggplot(aes(x = ad,y = m, fill = ad)) +geom_bar(stat = "identity",position = "dodge") + geom_errorbar(aes(ymin = m - ci, ymax = m + ci),width = 0.5)

GM_LUCKY_ad <- c("TOAST","Increase","Increase", "No Difference")

Test result : None - A : increase / None - B : increase /A -B : no difference

3.3 Conslusion for Strategy Evaluation

GM_brand <- c("  ", "GENERAL MILLS CHEERIOS", "GENERAL MILLS LUCKY CHARMS","GENERAL MILLS CINNAMON TST CR","GENERAL MILLS COCOA PUFFS","GENERAL MILLS KIX")

GM_promotion <- c("Sales","Units")

promo_eva <- data.frame(GM_promotion, CHEERIOS_TOAST, TST_CINNAMON,PUFFS_COCOA,KIX_REGULAR,LUCKY_TOAST)

pic_table1 <- formattable(promo_eva)
pic_table1

GM_promotion	CHEERIOS_TOAST	TST_CINNAMON	PUFFS_COCOA	KIX_REGULAR	LUCKY_TOAST
Sales	Decrease	Increase	Increase	No Difference	Increase
Units	Increase	Increase	Increase	Increase	Increase

png("pic_table1.png",width =800, height=200,bg = "white")
grid.table(pic_table1)
dev.off()

## quartz_off_screen 
##                 2

#export_formattable(pic_table1,"pic_table1.png")

GM_brand <- c("  ", "GENERAL MILLS CHEERIOS", "GENERAL MILLS LUCKY CHARMS","GENERAL MILLS CINNAMON TST CR","GENERAL MILLS COCOA PUFFS","GENERAL MILLS KIX")

GM_Advertisement <- c("Flavor","Medium Ad","Small Ad"," Medium/Small")

ad_eva <- data.frame(GM_Advertisement ,GM_Cherrios_ad,GM_CINNAMON_ad,GM_COCOA_ad,GM_KIX_ad,GM_LUCKY_ad)

pic_table2 <- formattable(ad_eva)
pic_table2

GM_Advertisement	GM_Cherrios_ad	GM_CINNAMON_ad	GM_COCOA_ad	GM_KIX_ad	GM_LUCKY_ad
Flavor	TOAST	CINNAMON	COCOA	REGULAR	TOAST
Medium Ad	No Difference	Increase	Increase	Increase	Increase
Small Ad	Increase	Increase	Increase	Increase	Increase
Medium/Small	No Difference	No Difference	No Difference	No Difference	No Difference

png("pic_table2.png",width =800, height=200,bg = "white")
grid.table(pic_table2)
dev.off()

## quartz_off_screen 
##                 2

Conclusion :

We should stop promotion CHerrios and KIX promotion if our main goal is increasing sales instead of increasing market share.
We should stop doing the median ad since it makes no difference to the small ads. But doing Median ad costs more.

4 Finding 2 - Reasons for Losing sales in REGULAR

4.1 Target Market analysis

# Kick out 7 record from GM in Regular but not from GENERAL MILLS LUCKY CHARMS

regular_sales <- my_data %>%
  filter( flavor == "REGULAR" & brand != "GENERAL MILLS LUCKY CHARMS" & brand != "GENERAL MILLS CHEERIOS"  ) %>%
  mutate(volume_price = price / volume)

grid.arrange(
  
sales_GM_P <- regular_sales %>% 
  filter(producer!= "KELLOGGS") %>%
  group_by(producer,week) %>% 
  summarise(sum_sales = sum(sales)) %>% 
  ggplot(aes(x = week, y = sum_sales, color = producer))+
  geom_line()+
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "Sales Pattern", color = " ") +
  labs(title = "GM & Post Regular flavor Sales Pattern", x = "Week", y = "Total Sales", color =" ") +  
  scale_color_manual(values = c("#339966", "#0066cc"), guide = guide_legend(reverse = TRUE)) +
  theme_classic(),

sales_GM_K <- regular_sales %>% 
  filter(producer != "POST") %>%
  group_by(producer,week) %>% 
  summarise(sum_sales = sum(sales)) %>% 
  ggplot(aes(x = week, y = sum_sales, color = producer)) +
  geom_line() +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "GM & Kelloggs Regular flavor Sales Pattern", x = "Week", y = "Total Sales", color =" ") +  
  scale_color_manual(values = c("#339966", "#ff6600"), guide = guide_legend(reverse = TRUE)) +
  theme(plot.title = element_text(size=20, face = "bold")) +
  theme_classic(),


ncol = 1

)

ggsave(filename = "sales_GM_P.png", width = 6, height = 2, plot = sales_GM_P)
ggsave(filename = "sales_GM_K.png", width = 6, height = 2, plot = sales_GM_K)

Finding :

We almost share the same sale pattern with Kelloggs, but not with the post. We can conclude that Kelloggs and we are selling to the same market with the same need. But why customers choose Kelloggs rather than us?

For the following, we list out several possible hypotheses and we tested them. We will focus mainly on the difference between us and Kelloggs since we share the same market.

4.2 Price

regular_sales <- my_data %>%
  filter( flavor == "REGULAR" & brand != "GENERAL MILLS LUCKY CHARMS" & brand != "GENERAL MILLS CHEERIOS"  ) %>%
  mutate(volume_price = price / volume)

price_volumn <- regular_sales  %>% 
  group_by(producer,brand)%>%
  mutate(unit_price = median(volume_price), n = n(), sd = sd(unit_price), ci = z*sd/sqrt(sd)) %>%
  ggplot(aes(x = producer, y = unit_price , fill =brand))+
  geom_bar(stat = "identity",position = "dodge") +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "Price per Volume in Regular Flavor", x = "Producer", y = "Price per Volume") +  
  theme(plot.title = element_text(size=24, face = "bold")) +
  theme(legend.title = element_text(size=12), legend.text=element_text(size=5)) +
  theme_classic()
  
price_volumn

ggsave(filename = "price_volumn.png", plot = price_volumn)

## Saving 7 x 5 in image

Finding : price per weight is higher than other brand

T test for finding :

# with Kelloggs

t.test(regular_sales$volume_price[regular_sales$producer == "GM"],regular_sales$volume_price[regular_sales$producer == "KELLOGGS"])

## 
##  Welch Two Sample t-test
## 
## data:  regular_sales$volume_price[regular_sales$producer == "GM"] and regular_sales$volume_price[regular_sales$producer == "KELLOGGS"]
## t = 49.93, df = 1905.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.683094 1.820721
## sample estimates:
## mean of x mean of y 
##  5.027739  3.275832

# with post

t.test(regular_sales$volume_price[regular_sales$producer == "GM"],regular_sales$volume_price[regular_sales$producer == "POST"])

## 
##  Welch Two Sample t-test
## 
## data:  regular_sales$volume_price[regular_sales$producer == "GM"] and regular_sales$volume_price[regular_sales$producer == "POST"]
## t = 55.446, df = 1943.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.898393 2.037614
## sample estimates:
## mean of x mean of y 
##  5.027739  3.059736

Test result :

Hypothesis confirmed, we are more expensive per volume than the other competitors in Regular

4.3 Promotion

grid.arrange(

# for promotion
  
my_data  %>% 
  group_by(flavor, promo,producer) %>%
  ggplot(aes(x = flavor, fill = promo)) +
  geom_bar(stat = "count",position = "dodge") +
  facet_grid(producer~.),



# for ad 

my_data  %>% 
  group_by(flavor, promo,producer) %>%
  ggplot(aes(x = flavor, fill = ad)) +
  geom_bar(stat = "count",position = "dodge") +
  facet_grid(producer~.),


ncol = 1
)

Findings:

For Promo

Kelloggs did much more promotion than us , especial in Regular. That can be the reson why there sales is better.(We will do a t.test on the affect of promotion)
POST did more promo than us

For Ad

Kelloggs did more ad than others. We did the least. That can related the the poor sale as well. (We will do a t.test on the affect of ad)

Test for the hypothesis:

# Promotion holding frequency

GM_K <- my_data %>% filter (producer %in% c("GM","KELLOGGS") )
chisq.test(table(GM_K$promo, GM_K$promo))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(GM_K$promo, GM_K$promo)
## X-squared = 19366, df = 1, p-value < 2.2e-16

GM_P <- my_data %>% filter (producer %in% c("GM","POST") )
chisq.test(table(GM_P$promo, GM_P$promo))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(GM_P$promo, GM_P$promo)
## X-squared = 9660.5, df = 1, p-value < 2.2e-16

Test result :

Both Kelloggs and Post held more promotion then us.

# Test promotion impact on sales for Kelloggs and Post

# Kelloggs for all 

t.test( K_record$sales[K_record$promo == "0"],K_record$sales[K_record$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  K_record$sales[K_record$promo == "0"] and K_record$sales[K_record$promo == "1"]
## t = -6.8113, df = 4638.1, p-value = 1.091e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.365500 -2.414138
## sample estimates:
## mean of x mean of y 
##  29.17031  32.56013

# Kelloggs in Regular
k_r <- 
  K_record %>% 
  filter(flavor == "REGULAR")

t.test( k_r$sales[k_r$promo == "0"],k_r$sales[k_r$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  k_r$sales[k_r$promo == "0"] and k_r$sales[k_r$promo == "1"]
## t = -4.2765, df = 1658.5, p-value = 2.007e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.118801 -1.899749
## sample estimates:
## mean of x mean of y 
##  34.11607  37.62535

# Post 
t.test( P_record$sales[P_record$promo == "0"],P_record$sales[P_record$promo == "1"])

## 
##  Welch Two Sample t-test
## 
## data:  P_record$sales[P_record$promo == "0"] and P_record$sales[P_record$promo == "1"]
## t = -1.4442, df = 943.51, p-value = 0.149
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.0985188  0.4714192
## sample estimates:
## mean of x mean of y 
##  22.44696  23.76051

Test result:

For Kelloggs :

p-value for all flavor :1.091e-11

p-value in Regular : 2.007e-05

Kelloggs promotion did help the sale, for the all brands and regular.

For Post :

p-value in Regular 0.149

Promotion did not increase the sales

4.4 Ad

Advertisment holding Frequecy

# ad do increase the sales and unit for all brand 
  
regular_sales %>% 
  group_by(producer, ad) %>%
  summarise( unit_sold = median(sales)) %>%
  ggplot(aes(x = producer, y =unit_sold, fill = ad)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title= "Ad affects unit sold")

Finding: After making sure that ad does increasing sale perfomance in all producer

# promo holding frequency   

 regular_sales %>%
  group_by(producer,ad) %>%
  ggplot(aes(producer, fill = ad)) +
  geom_bar(position = "fill")+
  coord_flip()+
  labs(title = "Ad Holding Frequency")

Finding: We did less than others in regular flavor. This maybe another reason that we lost sales in regular.

5 Conclusion

After analyzing, we summarized several findings as follow:

From the perspective of producer, we lost sales to Kelloggs but won post
From the perspective of flavor, we lost mainly in Regular, to both Kelloggs and Post.
The previous problem can cause by :
1. Too less brand in this flavor for customers to choose
2. Unnecessary promotion, which decrease the total revenue instead of boosted.
3. Too expensive price per volume
4. Too low advertisement holding frequency
We held too least promotion and advertisement compared to our competitors.
For all brands we have, promotion did increase unit sold, but for Cherrios the sales decreased and for KIX promotion didn’t make difference in unit sold.
For all brands we have, median and small advertisement don’t make difference in unit sold. For cost effective reason, we should stop holding median advertisement.
We should consider selling “Fruit” flavor, because that’s the only flavor we don’t have.

Midterm_Project_TA

Gloria & Vanessa

11/2/2019

1 Load Package and Data

1.1 Load Package

1.2 Data Loading and Cleaning

2 Base EDA for all brand in Cereal Market

2.1 Uni-variable Non-Graphic

2.2 Uni-variable Graphic

2.3 Multi-variable Non-Graphic

2.4 Multi-variable Graphic

2.4.1 Brand Analysis

2.4.2 Flavor Analysis

2.4.3 Package and sales

2.4.4 Price analysis

3 Finding 1 -Some Strategies We Used DID NOT Make Difference

3.1 Promotion Evaluation

3.1.1 Premise:

3.1.2 T.Test For Promotion Effect

3.1.2.1 GENERAL MILLS CHEERIOS

3.1.2.2 GENERAL MILLS CINNAMON

3.1.2.3 GENERAL MILLS COCOA PUFFS

3.1.2.4 GENERAL MILLS KIX

3.1.2.5 GENERAL MILLS LUCKY CHARMS

3.2 Advertising Evaluation

3.2.1 Premise:

3.2.2 T.test for Ad Impact

3.2.2.1 GENERAL MILLS CHEERIOS

3.2.2.2 GENERAL MILLS CINNAMON TST CR

3.2.2.3 GENERAL MILLS CHEERIOS

3.2.2.4 GENERAL MILLS KIX

3.2.2.5 GENERAL MILLS LUCKY CHARMS

3.3 Conslusion for Strategy Evaluation

4 Finding 2 - Reasons for Losing sales in REGULAR

4.1 Target Market analysis

4.2 Price

4.3 Promotion

4.4 Ad

5 Conclusion