Data Preparation

Research question

You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.

  1. Calculate sales variance /effectiveness by promo by season, Which promotion is effective (buy 10 get 1 ) or (buy 20 get1 free).
  2. Evaluate by specific promotion which creates the most up lift, and does time period play a role in the offer as well.
  3. Does customers buy more during the Quarter End
  4. Does customer who baught more at the Quarter end , how did they perfrom during the followin quarter.

Cases

What are the cases, and how many are there?

I have data for 2 Year for each Quarter.What else I should Incldue here ?

Data collection

Describe the method of data collection.

Promotion and Sales data from 2017 and 2018 Would be provided by Respective team for analysis.

Type of study

What type of study is this (observational/experiment)? We do studies to gather information and draw conclusions. The type of conclusion we draw depends on the study method used: In an observational study, we measure or survey members of a sample without trying to affect them. In a controlled experiment, we assign people or things to groups and apply some treatment to one of the groups, while the other group does not receive the treatment.

This is observational study.

Data Source

If you collected the data, state self-collected. If not, provide a citation/link.

workDir <- getwd()

filePath = paste0(workDir,"/Data")
g_max <- 1048576 
ctype = c("text","text","text","text","text","numeric","numeric","text","text","text","text","text","text","text")
 Q1_17 <-read_excel(path=paste0(filePath,"/","2017_Q1.xlsx"),sheet="Data",guess_max =g_max)
 Q2_17 <-read_xlsx(path=paste0(filePath,"/","2017_Q2.xlsx"),sheet="Data",guess_max =g_max)
## New names:
## * `` -> `..1`
## * `Common Owner` -> `Common Owner..2`
## * `Common Owner` -> `Common Owner..12`
 (nms <- names(read_excel(path=paste0(filePath,"/","2017_Q3.xlsx"), sheet="DATA",n_max = 0)))
## New names:
## * `Common Owner` -> `Common Owner..1`
## * `Common Owner` -> `Common Owner..11`
##  [1] "Common Owner..1"        "Main Acct Name/CO Name"
##  [3] "Zip Code"               "Customer Door"         
##  [5] "Name"                   "Sales Rep."            
##  [7] "Sales Rep. Name"        "Sales Manager"         
##  [9] "Sales Manager Name"     "Brand"                 
## [11] "Common Owner..11"       "Alliance Code"         
## [13] "Acquisition Mode"       "DSM"                   
## [15] "RCSM"                   "Order Number"          
## [17] "Order Date"             "Order Quantity"        
## [19] "Promotion Order Doll"   "Promotion"             
## [21] "Internal Code"          "Promo Code"            
## [23] "Ship Sets"              "From Date"             
## [25] "To Date"                "Brand v2"              
## [27] "address"                "Address 2"             
## [29] "City"                   "State"                 
## [31] "Zip"                    "phone"
 Q3_17 <-read_xlsx(path=paste0(filePath,"/","2017_Q3.xlsx"),sheet="DATA",guess_max =g_max)
## New names:
## * `Common Owner` -> `Common Owner..1`
## * `Common Owner` -> `Common Owner..11`
 Q4_17 <-read_excel(path=paste0(filePath,"/","2017_Q4.xlsx"),sheet="DATA",guess_max =g_max)
## New names:
## * `` -> `..1`
## * `Common Owner` -> `Common Owner..2`
## * Zip -> Zip..4
## * `Common Owner` -> `Common Owner..12`
## * Zip -> Zip..31
 Q1_18 <-read_excel(path=paste0(filePath,"/","2018_Q1.xlsx"),sheet="Data",guess_max =g_max)
## New names:
## * `Common Owner` -> `Common Owner..1`
## * `Common Owner` -> `Common Owner..11`
 Q2_18 <-read_excel(path=paste0(filePath,"/","2018_Q2.xlsx"),sheet="Data",guess_max =g_max)
## New names:
## * `Common Owner` -> `Common Owner..2`
## * `Common Owner` -> `Common Owner..12`
 Q3_18 <-read_excel(path=paste0(filePath,"/","2018_Q3.xlsx"),sheet="Data",guess_max =g_max)
 Q4_18 <-read_excel(path=paste0(filePath,"/","2018_Q4.xlsx"),sheet="Data",guess_max =g_max)
 
# 
#  
# 
# 
# Q1_17[,c(2,3,4,5,6,7,8,9,10,14,15,16,17,18,19,20,21,22,25,26,27)]
# Q2_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,22,24,25,26,30,31,32)]
# Q3_17[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
# Q4_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,29,30,31)]
# Q1_18[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
# Q2_18[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,30,31,32)]
# Q3_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
# Q4_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
# 
# 
# 
#    
#    
#  
#   str(Q1_17)SData <- bind_rows(Q3_17,Q1_17)
#  
#   Q1_17[,c("Main" , "Customer")]
#   unique(Q1_17$exc)
#   Q3_17[7359,]
#   
# All_Coll_Name <- bind_cols(  
# "1"=as.data.frame(names(Q1_17[,c(2,3,4,5,6,7,8,9,10,14,15,16,17,18,19,20,21,22,25,26,27)])),
# "2"=as.data.frame(names(Q2_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,22,24,25,26,30,31,32)])),
# "3"=as.data.frame(names(Q3_17[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)])),
# "4"=as.data.frame(names(Q4_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,29,30,31)])),
# "18"=as.data.frame(names(Q1_18[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)])),
# "28"=as.data.frame(names(Q2_18[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,30,31,32)])),
# "8"=as.data.frame(names(Q3_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)])),
# "48"=as.data.frame(names(Q4_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)])))
# 
# names(All_Coll_Name) <- 1:8


Q1_17 <- Q1_17[,c(2,3,4,5,6,7,8,9,10,14,15,16,17,18,19,20,21,22,25,26,27)]
Q2_17 <- Q2_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,22,24,25,26,30,31,32)]
Q3_17 <- Q3_17[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
Q4_17 <- Q4_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,29,30,31)]
Q1_18 <- Q1_18[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
Q2_18 <- Q2_18[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,30,31,32)]
Q3_18 <- Q3_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
Q4_18 <- Q4_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]

   Q1_17$Qt <-   "Q1_17"
   Q2_17$Qt <-   "Q2_17"
   Q3_17$Qt <-   "Q3_17"
   Q4_17$Qt <-   "Q4_17"
   Q1_18$Qt <-   "Q1_18"
   Q2_18$Qt <-   "Q2_18"
   Q3_18$Qt <-   "Q3_18"
   Q4_18$Qt <-   "Q4_18"
   
   
   names(Q2_17) <- names(Q1_17)
   names(Q3_17) <- names(Q1_17)
   names(Q4_17) <- names(Q1_17)
   names(Q1_18) <- names(Q1_17)
   names(Q2_18) <- names(Q1_17)
   names(Q3_18) <- names(Q1_17)
   names(Q4_18) <- names(Q1_17)
   
   rm(mkt_Data)
## Warning in rm(mkt_Data): object 'mkt_Data' not found
   mkt_Data <- bind_rows(
   Q1_17,
   Q2_17,
   Q3_17,
   Q4_17,
   Q1_18,
   Q2_18,
   Q3_18,
   Q4_18)

    
head(mkt_Data)

Dependent Variable

What is the response variable? Is it quantitative or qualitative?

Order Quantity is Response variable here . It’s Quantitative variable. Order Qty by Quarter for Customer , Needs to calcualte afte rcollecting data over year.

Independent Variable

You should have two independent variables, one quantitative and one qualitative. Promotion, (same as External Description), Zip code are Independent varaible . Season needs to calcualte All are qualitative varaible as we can’t add them.

Relevant summary statistics

Provide summary statistics for each the variables. Also include appropriate visualizations related to your research question (e.g. scatter plot, boxplots, etc). This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.

# PLot by $ value and Qty by Qtr

ggplot(mkt_Data, mapping = aes(x= as.character(mkt_Data$`Promotion Order Doll`), y= mkt_Data$`Order Quantity`, color= mkt_Data$`External Description`))+
  geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1)) + facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (geom_point).

# Customer by order Qty Per Qtr

ggplot(mkt_Data, mapping = aes(x= as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
  geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1))+facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (geom_point).

# Orde  date and Order Qty 

ggplot(mkt_Data, mapping = aes(x=mkt_Data$`Order Date`, y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
geom_smooth() 
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7256 rows containing non-finite values (stat_smooth).

ggplot(mkt_Data, mapping = aes(x=mkt_Data$`Order Date`, y= mkt_Data$`Order Quantity`))+
geom_smooth() 
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7256 rows containing non-finite values (stat_smooth).

# Order Qty by ZIP per QTR
ggplot(mkt_Data, mapping = aes(x=mkt_Data$zip, y= mkt_Data$`Order Quantity`,color= mkt_Data$Qt))+
geom_col()
## Warning: Removed 7256 rows containing missing values (position_stack).

# by Brand 
ggplot(mkt_Data, mapping = aes(x= mkt_Data$Brand, y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
  geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1))
## Warning: Removed 7256 rows containing missing values (geom_point).

# By STATE

ggplot(mkt_Data, mapping = aes(x=mkt_Data$state, y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
  geom_col()+theme(axis.text.x = element_text(angle = 70, hjust = 1)) + facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (position_stack).

# Customer by Promotion 
ggplot(mkt_Data, mapping = aes(x= as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$`External Description`))+facet_wrap(~mkt_Data$Qt) + 
  geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1))
## Warning: Removed 7256 rows containing missing values (geom_point).

ggplot(mkt_Data, mapping = aes(x=as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
  geom_col()+theme(axis.text.x = element_text(angle = 70, hjust = 1)) + facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (position_stack).

# ggplot(mkt_Data, mapping = aes(x=as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$`External Description`))+
#   geom_col()+theme(axis.text.x = element_text(angle = 70, hjust = 1))
 group_by(mkt_Data,mkt_Data$`External Description`)%>% ggplot2::ggplot(mapping = aes(x=mkt_Data$`External Description`))+geom_bar()+ theme(axis.text.x = element_text(angle = 70, hjust = 1))

group_by(mkt_Data,mkt_Data$`External Description`)%>% summarise(count=n())
  group_by(mkt_Data,mkt_Data$Promotion)%>% summarise(count=n())
 plyr:: ddply(mkt_Data, c("`External Description`","Promotion"), summarise, Count = sum(`Order Quantity`) )
 plyr:: ddply(mkt_Data, c("`External Description`","zip"), summarise, Count = sum(`Order Quantity`) )
 group_by(mkt_Data,mkt_Data$zip)%>% summarise(count=n())