R Bridge Course Final Project This is a final project to show off what you have learned. Select your data set from the list below: http://vincentarelbundock.github.io/Rdatasets/ (click on the csv index for a list). Another good source is found here: https://https://archive.ics.uci.edu/ml/datasets.html The presentation approach is up to you but it should contain the following:
Question: What is the impact on Grocery Sales from item placement at a store?
Conclusion to question above written at the end.
Grocery <-read.csv(file="https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/Stat2Data/Grocery.csv", header = TRUE, sep = ",")
Grocery
## X Discount Store Display Sales Price
## 1 1 5.00% 1 Featured End of Aisl 240 8.96
## 2 2 5.00% 1 Featured Middle of A 264 9.19
## 3 3 5.00% 1 Not Featured 192 8.46
## 4 4 5.00% 2 Featured End of Aisl 216 8.58
## 5 5 5.00% 2 Featured Middle of A 174 8.31
## 6 6 5.00% 2 Not Featured 264 9.30
## 7 7 5.00% 3 Featured End of Aisl 176 8.04
## 8 8 5.00% 3 Featured Middle of A 220 8.80
## 9 9 5.00% 3 Not Featured 171 8.03
## 10 10 5.00% 4 Featured End of Aisl 199 8.39
## 11 11 5.00% 4 Featured Middle of A 180 8.17
## 12 12 5.00% 4 Not Featured 146 7.76
## 13 13 10.00% 5 Featured End of Aisl 244 8.91
## 14 14 10.00% 5 Featured Middle of A 173 8.07
## 15 15 10.00% 5 Not Featured 225 8.76
## 16 16 10.00% 6 Featured End of Aisl 252 8.99
## 17 17 10.00% 6 Featured Middle of A 192 8.29
## 18 18 10.00% 6 Not Featured 270 9.25
## 19 19 10.00% 7 Featured End of Aisl 202 8.37
## 20 20 10.00% 7 Featured Middle of A 261 9.15
## 21 21 10.00% 7 Not Featured 225 8.64
## 22 22 10.00% 8 Featured End of Aisl 179 8.06
## 23 23 10.00% 8 Featured Middle of A 222 8.59
## 24 24 10.00% 8 Not Featured 168 8.03
## 25 25 15.00% 9 Featured End of Aisl 234 8.73
## 26 26 15.00% 9 Featured Middle of A 233 8.78
## 27 27 15.00% 9 Not Featured 162 7.91
## 28 28 15.00% 10 Featured End of Aisl 220 8.49
## 29 29 15.00% 10 Featured Middle of A 209 8.41
## 30 30 15.00% 10 Not Featured 258 9.02
## 31 31 15.00% 11 Featured End of Aisl 215 8.50
## 32 32 15.00% 11 Featured Middle of A 199 8.22
## 33 33 15.00% 11 Not Featured 242 8.82
## 34 34 15.00% 12 Featured End of Aisl 179 8.11
## 35 35 15.00% 12 Featured Middle of A 206 8.37
## 36 36 15.00% 12 Not Featured 206 8.42
summary(Grocery)
## X Discount Store Display
## Min. : 1.00 10.00%:12 Min. : 1.00 Featured End of Aisl:12
## 1st Qu.: 9.75 15.00%:12 1st Qu.: 3.75 Featured Middle of A:12
## Median :18.50 5.00% :12 Median : 6.50 Not Featured :12
## Mean :18.50 Mean : 6.50
## 3rd Qu.:27.25 3rd Qu.: 9.25
## Max. :36.00 Max. :12.00
## Sales Price
## Min. :146.0 Min. :7.760
## 1st Qu.:179.8 1st Qu.:8.207
## Median :212.0 Median :8.475
## Mean :211.6 Mean :8.524
## 3rd Qu.:235.5 3rd Qu.:8.805
## Max. :270.0 Max. :9.300
mean(Grocery$Sales)
## [1] 211.6111
median(Grocery$Price)
## [1] 8.475
quantile(Grocery$Price)
## 0% 25% 50% 75% 100%
## 7.7600 8.2075 8.4750 8.8050 9.3000
Preliminary conclusions from data:
The lowest price is 7.76 while the highest price is 9.30.
The median price is 8.47.
The mean Sales is 211.61.
requires package plyr
Grocery2 <- subset(Grocery, Sales > 230, c('Store','Display', 'Sales'))
Grocery2
## Store Display Sales
## 1 1 Featured End of Aisl 240
## 2 1 Featured Middle of A 264
## 6 2 Not Featured 264
## 13 5 Featured End of Aisl 244
## 16 6 Featured End of Aisl 252
## 18 6 Not Featured 270
## 20 7 Featured Middle of A 261
## 25 9 Featured End of Aisl 234
## 26 9 Featured Middle of A 233
## 30 10 Not Featured 258
## 33 11 Not Featured 242
library(plyr)
Grocery2<-rename(Grocery2, c("Store"="GroceryStore","Display"="DisplaySetup" ,"Sales"="TotalSales"))
Grocery2
## GroceryStore DisplaySetup TotalSales
## 1 1 Featured End of Aisl 240
## 2 1 Featured Middle of A 264
## 6 2 Not Featured 264
## 13 5 Featured End of Aisl 244
## 16 6 Featured End of Aisl 252
## 18 6 Not Featured 270
## 20 7 Featured Middle of A 261
## 25 9 Featured End of Aisl 234
## 26 9 Featured Middle of A 233
## 30 10 Not Featured 258
## 33 11 Not Featured 242
Grocery2$DisplaySetup<-revalue(Grocery2$DisplaySetup,c("Featured End of Aisl"="Featured"))
Grocery2$DisplaySetup<-revalue(Grocery2$DisplaySetup,c("Featured Middle of A"="Featured"))
Grocery2$DisplaySetup<-revalue(Grocery2$DisplaySetup,c("Not Featured"="Low Priority"))
Grocery2
## GroceryStore DisplaySetup TotalSales
## 1 1 Featured 240
## 2 1 Featured 264
## 6 2 Low Priority 264
## 13 5 Featured 244
## 16 6 Featured 252
## 18 6 Low Priority 270
## 20 7 Featured 261
## 25 9 Featured 234
## 26 9 Featured 233
## 30 10 Low Priority 258
## 33 11 Low Priority 242
plot(x = Grocery$Sales,y = Grocery$Price,xlab = "Sales",ylab = "Price",
xlim = c(146,270),ylim = c(7.00,10.00),main = "Sales vs Price")
boxplot(Grocery$Sales ~ Grocery$Discount, data = Grocery, xlab = "Discount",
ylab = "Sales", main = "Discount Vs Sales")
hist(Grocery$Price,main="Grocery Prices",xlab="Prices",breaks=4,col="red")
requires ggplot2 package requires gridExtra package
library(ggplot2)
library(gridExtra)
library(plyr)
x<-ggplot(Grocery, aes(x=Grocery$Sales, y=Grocery$Price)) +
geom_point(shape=1) + geom_smooth(method=lm)
x+ggtitle(" Sales Vs Price")
Grocery4 <- subset(Grocery,Store<5, c('Store','Discount','Display', 'Sales'))
Grocery4$Display<-revalue(Grocery4$Display,c("Featured End of Aisl"="Featured"))
Grocery4$Display<-revalue(Grocery4$Display,c("Featured Middle of A"="Featured"))
Grocery4$Display<-revalue(Grocery4$Display,c("Not Featured"="Not Featured"))
x<-ggplot(data=Grocery4, aes(x=Grocery4$Store, y=Grocery4$Sales, fill=Grocery4$Display)) +
geom_bar(stat="identity", position=position_dodge(), colour="black")
p1<-x+ggtitle("Sales By Display 5% Discount")
Grocery5 <- subset(Grocery,Store>4 & Store<9, c('Store','Discount','Display', 'Sales'))
Grocery5$Display<-revalue(Grocery5$Display,c("Featured End of Aisl"="Featured"))
Grocery5$Display<-revalue(Grocery5$Display,c("Featured Middle of A"="Featured"))
Grocery5$Display<-revalue(Grocery5$Display,c("Not Featured"="Not Featured"))
x<-ggplot(data=Grocery5, aes(x=Grocery5$Store, y=Grocery5$Sales, fill=Grocery5$Display)) +
geom_bar(stat="identity", position=position_dodge(), colour="black")
p2<-x+ggtitle("Sales By Display 10% Discount")
Grocery6 <- subset(Grocery,Store>8, c('Store','Discount','Display', 'Sales'))
Grocery6$Display<-revalue(Grocery6$Display,c("Featured End of Aisl"="Featured"))
Grocery6$Display<-revalue(Grocery6$Display,c("Featured Middle of A"="Featured"))
Grocery6$Display<-revalue(Grocery6$Display,c("Not Featured"="Not Featured"))
x<-ggplot(data=Grocery6, aes(x=Grocery6$Store, y=Grocery6$Sales, fill=Grocery6$Display)) +
geom_bar(stat="identity", position=position_dodge(), colour="black")
p3<-x+ggtitle("Sales By Display 15% Discount")
grid.arrange(p1, p2,p3,nrow = 2)
Grocery7 <- subset(Grocery, Store>0,c('Store','Discount','Display', 'Sales'))
Grocery7$Display<-revalue(Grocery7$Display,c("Featured End of Aisl"="Featured"))
Grocery7$Display<-revalue(Grocery7$Display,c("Featured Middle of A"="Featured"))
Grocery7$Display<-revalue(Grocery7$Display,c("Not Featured"="Not Featured"))
Grocery7$Store<-mapvalues(Grocery7$Store, from=c(1,2,3,4), to =c("Discount5","Discount5","Discount5","Discount5"))
Grocery7$Store<-mapvalues(Grocery7$Store, from=c(5,6,7,8), to =c("Discount10","Discount10","Discount10","Discount10"))
Grocery7$Store<-mapvalues(Grocery7$Store, from=c(9,10,11,12), to =c("Discount15","Discount15","Discount15","Discount15"))
x<-ggplot(data=Grocery7, aes(x=Grocery7$Store, y=Grocery7$Sales, fill=Grocery7$Display)) +
geom_bar(stat="identity", position=position_dodge(), colour="black")
p4<-x+ggtitle("Sales By Display By Discount")
p4
What is the impact on Grocery Sales from item placement at a store?
The conclusion that can be drawn from analysis of the data from the graphs particularly the graphs that break down sales by discount is that item placement has less of an impact on increasing sales for featured items as discounts get larger. When the discount is the highest at 15%, sales of non featured items overall is greater than featured items. At a majority of the stores that have 15% discounts the sales of non featured items have more sales than the items featured. And when looking at the store with 5% discount which is the lowest, sales of featured items are greater overall compared to non featured items. As well as the majority of the stores with 5% discounts on a store by store basis.
require tidyverse package
library (readr)
urlfile="https://raw.githubusercontent.com/johnsuh23/Project1/master/Grocery.csv"
grocery<-read_csv(url(urlfile))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## Discount = col_character(),
## Store = col_double(),
## Display = col_character(),
## Sales = col_double(),
## Price = col_double()
## )
grocery
## # A tibble: 36 x 6
## X1 Discount Store Display Sales Price
## <dbl> <chr> <dbl> <chr> <dbl> <dbl>
## 1 1 5.00% 1 Featured End of Aisl 240 8.96
## 2 2 5.00% 1 Featured Middle of A 264 9.19
## 3 3 5.00% 1 Not Featured 192 8.46
## 4 4 5.00% 2 Featured End of Aisl 216 8.58
## 5 5 5.00% 2 Featured Middle of A 174 8.31
## 6 6 5.00% 2 Not Featured 264 9.3
## 7 7 5.00% 3 Featured End of Aisl 176 8.04
## 8 8 5.00% 3 Featured Middle of A 220 8.8
## 9 9 5.00% 3 Not Featured 171 8.03
## 10 10 5.00% 4 Featured End of Aisl 199 8.39
## # ... with 26 more rows
Please submit your .rmd file and the .csv file as well as a link to your RPubs.