You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.
What are the cases, and how many are there?
I have data for 2 Year for each Quarter.What else I should Incldue here ?
Describe the method of data collection.
Promotion and Sales data from 2017 and 2018 Would be provided by Respective team for analysis.
What type of study is this (observational/experiment)? We do studies to gather information and draw conclusions. The type of conclusion we draw depends on the study method used: In an observational study, we measure or survey members of a sample without trying to affect them. In a controlled experiment, we assign people or things to groups and apply some treatment to one of the groups, while the other group does not receive the treatment.
This is observational study.
If you collected the data, state self-collected. If not, provide a citation/link.
workDir <- getwd()
filePath = paste0(workDir,"/Data")
g_max <- 1048576
ctype = c("text","text","text","text","text","numeric","numeric","text","text","text","text","text","text","text")
Q1_17 <-read_excel(path=paste0(filePath,"/","2017_Q1.xlsx"),sheet="Data",guess_max =g_max)
Q2_17 <-read_xlsx(path=paste0(filePath,"/","2017_Q2.xlsx"),sheet="Data",guess_max =g_max)
## New names:
## * `` -> `..1`
## * `Common Owner` -> `Common Owner..2`
## * `Common Owner` -> `Common Owner..12`
(nms <- names(read_excel(path=paste0(filePath,"/","2017_Q3.xlsx"), sheet="DATA",n_max = 0)))
## New names:
## * `Common Owner` -> `Common Owner..1`
## * `Common Owner` -> `Common Owner..11`
## [1] "Common Owner..1" "Main Acct Name/CO Name"
## [3] "Zip Code" "Customer Door"
## [5] "Name" "Sales Rep."
## [7] "Sales Rep. Name" "Sales Manager"
## [9] "Sales Manager Name" "Brand"
## [11] "Common Owner..11" "Alliance Code"
## [13] "Acquisition Mode" "DSM"
## [15] "RCSM" "Order Number"
## [17] "Order Date" "Order Quantity"
## [19] "Promotion Order Doll" "Promotion"
## [21] "Internal Code" "Promo Code"
## [23] "Ship Sets" "From Date"
## [25] "To Date" "Brand v2"
## [27] "address" "Address 2"
## [29] "City" "State"
## [31] "Zip" "phone"
Q3_17 <-read_xlsx(path=paste0(filePath,"/","2017_Q3.xlsx"),sheet="DATA",guess_max =g_max)
## New names:
## * `Common Owner` -> `Common Owner..1`
## * `Common Owner` -> `Common Owner..11`
Q4_17 <-read_excel(path=paste0(filePath,"/","2017_Q4.xlsx"),sheet="DATA",guess_max =g_max)
## New names:
## * `` -> `..1`
## * `Common Owner` -> `Common Owner..2`
## * Zip -> Zip..4
## * `Common Owner` -> `Common Owner..12`
## * Zip -> Zip..31
Q1_18 <-read_excel(path=paste0(filePath,"/","2018_Q1.xlsx"),sheet="Data",guess_max =g_max)
## New names:
## * `Common Owner` -> `Common Owner..1`
## * `Common Owner` -> `Common Owner..11`
Q2_18 <-read_excel(path=paste0(filePath,"/","2018_Q2.xlsx"),sheet="Data",guess_max =g_max)
## New names:
## * `Common Owner` -> `Common Owner..2`
## * `Common Owner` -> `Common Owner..12`
Q3_18 <-read_excel(path=paste0(filePath,"/","2018_Q3.xlsx"),sheet="Data",guess_max =g_max)
Q4_18 <-read_excel(path=paste0(filePath,"/","2018_Q4.xlsx"),sheet="Data",guess_max =g_max)
#
#
#
#
# Q1_17[,c(2,3,4,5,6,7,8,9,10,14,15,16,17,18,19,20,21,22,25,26,27)]
# Q2_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,22,24,25,26,30,31,32)]
# Q3_17[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
# Q4_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,29,30,31)]
# Q1_18[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
# Q2_18[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,30,31,32)]
# Q3_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
# Q4_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
#
#
#
#
#
#
# str(Q1_17)SData <- bind_rows(Q3_17,Q1_17)
#
# Q1_17[,c("Main" , "Customer")]
# unique(Q1_17$exc)
# Q3_17[7359,]
#
# All_Coll_Name <- bind_cols(
# "1"=as.data.frame(names(Q1_17[,c(2,3,4,5,6,7,8,9,10,14,15,16,17,18,19,20,21,22,25,26,27)])),
# "2"=as.data.frame(names(Q2_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,22,24,25,26,30,31,32)])),
# "3"=as.data.frame(names(Q3_17[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)])),
# "4"=as.data.frame(names(Q4_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,29,30,31)])),
# "18"=as.data.frame(names(Q1_18[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)])),
# "28"=as.data.frame(names(Q2_18[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,30,31,32)])),
# "8"=as.data.frame(names(Q3_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)])),
# "48"=as.data.frame(names(Q4_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)])))
#
# names(All_Coll_Name) <- 1:8
Q1_17 <- Q1_17[,c(2,3,4,5,6,7,8,9,10,14,15,16,17,18,19,20,21,22,25,26,27)]
Q2_17 <- Q2_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,22,24,25,26,30,31,32)]
Q3_17 <- Q3_17[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
Q4_17 <- Q4_17[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,29,30,31)]
Q1_18 <- Q1_18[,c(1,2,4,5,6,7,8,9,10,16,17,18,19,20,22,23,24,25,29,30,31)]
Q2_18 <- Q2_18[,c(2,3,5,6,7,8,9,10,11,17,18,19,20,21,23,24,25,26,30,31,32)]
Q3_18 <- Q3_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
Q4_18 <- Q4_18[,c(2,3,5,6,7,8,9,10,11,18,19,20,21,22,24,25,26,27,31,32,33)]
Q1_17$Qt <- "Q1_17"
Q2_17$Qt <- "Q2_17"
Q3_17$Qt <- "Q3_17"
Q4_17$Qt <- "Q4_17"
Q1_18$Qt <- "Q1_18"
Q2_18$Qt <- "Q2_18"
Q3_18$Qt <- "Q3_18"
Q4_18$Qt <- "Q4_18"
names(Q2_17) <- names(Q1_17)
names(Q3_17) <- names(Q1_17)
names(Q4_17) <- names(Q1_17)
names(Q1_18) <- names(Q1_17)
names(Q2_18) <- names(Q1_17)
names(Q3_18) <- names(Q1_17)
names(Q4_18) <- names(Q1_17)
rm(mkt_Data)
## Warning in rm(mkt_Data): object 'mkt_Data' not found
mkt_Data <- bind_rows(
Q1_17,
Q2_17,
Q3_17,
Q4_17,
Q1_18,
Q2_18,
Q3_18,
Q4_18)
head(mkt_Data)
What is the response variable? Is it quantitative or qualitative?
Order Quantity is Response variable here . It’s Quantitative variable. Order Qty by Quarter for Customer , Needs to calcualte afte rcollecting data over year.
You should have two independent variables, one quantitative and one qualitative. Promotion, (same as External Description), Zip code are Independent varaible . Season needs to calcualte All are qualitative varaible as we can’t add them.
Provide summary statistics for each the variables. Also include appropriate visualizations related to your research question (e.g. scatter plot, boxplots, etc). This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.
# PLot by $ value and Qty by Qtr
ggplot(mkt_Data, mapping = aes(x= as.character(mkt_Data$`Promotion Order Doll`), y= mkt_Data$`Order Quantity`, color= mkt_Data$`External Description`))+
geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1)) + facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (geom_point).
# Customer by order Qty Per Qtr
ggplot(mkt_Data, mapping = aes(x= as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1))+facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (geom_point).
# Orde date and Order Qty
ggplot(mkt_Data, mapping = aes(x=mkt_Data$`Order Date`, y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7256 rows containing non-finite values (stat_smooth).
ggplot(mkt_Data, mapping = aes(x=mkt_Data$`Order Date`, y= mkt_Data$`Order Quantity`))+
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7256 rows containing non-finite values (stat_smooth).
# Order Qty by ZIP per QTR
ggplot(mkt_Data, mapping = aes(x=mkt_Data$zip, y= mkt_Data$`Order Quantity`,color= mkt_Data$Qt))+
geom_col()
## Warning: Removed 7256 rows containing missing values (position_stack).
# by Brand
ggplot(mkt_Data, mapping = aes(x= mkt_Data$Brand, y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1))
## Warning: Removed 7256 rows containing missing values (geom_point).
# By STATE
ggplot(mkt_Data, mapping = aes(x=mkt_Data$state, y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
geom_col()+theme(axis.text.x = element_text(angle = 70, hjust = 1)) + facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (position_stack).
# Customer by Promotion
ggplot(mkt_Data, mapping = aes(x= as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$`External Description`))+facet_wrap(~mkt_Data$Qt) +
geom_point()+theme(axis.text.x = element_text(angle = 70, hjust = 1))
## Warning: Removed 7256 rows containing missing values (geom_point).
ggplot(mkt_Data, mapping = aes(x=as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$Qt))+
geom_col()+theme(axis.text.x = element_text(angle = 70, hjust = 1)) + facet_wrap(~mkt_Data$Qt)
## Warning: Removed 7256 rows containing missing values (position_stack).
# ggplot(mkt_Data, mapping = aes(x=as.character(mkt_Data$Customer), y= mkt_Data$`Order Quantity`, color= mkt_Data$`External Description`))+
# geom_col()+theme(axis.text.x = element_text(angle = 70, hjust = 1))
group_by(mkt_Data,mkt_Data$`External Description`)%>% ggplot2::ggplot(mapping = aes(x=mkt_Data$`External Description`))+geom_bar()+ theme(axis.text.x = element_text(angle = 70, hjust = 1))
group_by(mkt_Data,mkt_Data$`External Description`)%>% summarise(count=n())
group_by(mkt_Data,mkt_Data$Promotion)%>% summarise(count=n())
plyr:: ddply(mkt_Data, c("`External Description`","Promotion"), summarise, Count = sum(`Order Quantity`) )
plyr:: ddply(mkt_Data, c("`External Description`","zip"), summarise, Count = sum(`Order Quantity`) )
group_by(mkt_Data,mkt_Data$zip)%>% summarise(count=n())