Goal:
1. Download data set from http://www.vizwiz.com/p/workout-wednesday.html
2. Replicate the data visualization as closely as you can
3. Share it on social media with #WorkoutWednesday
Source:
https://www.linkedin.com/pulse/november-data-challenge-kate-strachnyi-/
Dataset:
https://www.womanindata.co.uk/2017/05/workoutwednesday-week-20-comparing.html
Replicate the following visualization
knitr::include_graphics("sales_comparison.png")
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.7
## ✔ tidyr 0.8.0 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.3
## ── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(readxl)
## Warning: package 'readxl' was built under R version 3.4.4
library(scales)
## Warning: package 'scales' was built under R version 3.4.4
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
sales = read_xlsx('superstore.xlsx')
## Warning in strptime(x, format, tz = tz): unknown timezone 'zone/tz/2018g.
## 1.0/zoneinfo/America/Chicago'
glimpse(sales)
## Observations: 8,399
## Variables: 23
## $ `Row ID` <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ...
## $ `Order ID` <dbl> 3, 6, 32, 32, 32, 32, 35, 35, 36, 65, 6...
## $ `Order Date` <dttm> 2013-10-12, 2015-02-19, 2014-07-14, 20...
## $ `Order Priority` <chr> "Low", "Not Specified", "High", "High",...
## $ `Order Quantity` <dbl> 6, 2, 26, 24, 23, 15, 30, 14, 46, 32, 4...
## $ Sales <dbl> 261.5400, 6.9300, 2808.0800, 1761.4000,...
## $ Discount <dbl> 0.04, 0.01, 0.07, 0.09, 0.04, 0.04, 0.0...
## $ `Ship Mode` <chr> "Regular Air", "Regular Air", "Regular ...
## $ Profit <dbl> -213.2500, -4.6400, 1054.8200, -1748.56...
## $ `Unit Price` <dbl> 38.94, 2.08, 107.53, 70.89, 7.99, 8.46,...
## $ `Shipping Cost` <dbl> 35.00, 2.56, 5.81, 89.30, 5.03, 8.99, 2...
## $ `Customer Name` <chr> "Muhammed MacIntyre", "Ruben Dartt", "L...
## $ City <chr> "Highland Park", "Edmonds", "Elk Plain"...
## $ `Zip Code` <chr> "60035", "98026", "98387", "98387", "98...
## $ State <chr> "Illinois", "Washington", "Washington",...
## $ Region <chr> "Central", "West", "West", "West", "Wes...
## $ `Customer Segment` <chr> "Small Business", "Corporate", "Corpora...
## $ `Product Category` <chr> "Office Supplies", "Office Supplies", "...
## $ `Product Sub-Category` <chr> "Storage & Organization", "Scissors, Ru...
## $ `Product Name` <chr> "Eldon Base for stackable storage shelf...
## $ `Product Container` <chr> "Large Box", "Small Pack", "Medium Box"...
## $ `Product Base Margin` <dbl> 0.80, 0.55, 0.65, 0.72, 0.60, 0.79, 0.5...
## $ `Ship Date` <dttm> 2013-10-19, 2015-02-20, 2014-07-16, 20...
sales <-
sales %>%
select("Order Date","Order Quantity", "Region", "Sales")
colnames(sales) <- c("Order_Date", "Order_Quantity", "Region", "Sales")
sales <-
sales %>%
filter((Region == "West" | Region == "East") & (year(Order_Date) == "2015"))
## Warning: package 'bindrcpp' was built under R version 3.4.4
glimpse(sales)
## Observations: 993
## Variables: 4
## $ Order_Date <dttm> 2015-02-19, 2015-04-04, 2015-04-04, 2015-03-02...
## $ Order_Quantity <dbl> 2, 49, 6, 50, 44, 13, 5, 30, 41, 24, 5, 7, 21, ...
## $ Region <chr> "West", "West", "West", "West", "West", "East",...
## $ Sales <dbl> 6.930, 329.030, 20.190, 7666.040, 4509.377, 70....
summary(sales)
## Order_Date Order_Quantity Region
## Min. :2015-01-01 00:00:00 Min. : 1.00 Length:993
## 1st Qu.:2015-03-30 00:00:00 1st Qu.:13.00 Class :character
## Median :2015-06-29 00:00:00 Median :26.00 Mode :character
## Mean :2015-06-29 16:18:51 Mean :25.97
## 3rd Qu.:2015-09-25 00:00:00 3rd Qu.:39.00
## Max. :2015-12-30 00:00:00 Max. :50.00
## Sales
## Min. : 3.96
## 1st Qu.: 139.66
## Median : 498.68
## Mean : 1824.34
## 3rd Qu.: 1921.13
## Max. :33367.85
sales_subset <- sales %>%
mutate(month = floor_date(Order_Date, "month")) %>%
group_by(Region, month) %>%
summarize(Order_Quantity = sum(Order_Quantity),
Sales = sum(Sales))
p1 <- ggplot(sales_subset) +
geom_col(position = "dodge", alpha = 0.7, aes(month, Order_Quantity, fill = Region)) +
xlab("Month") +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
scale_y_continuous(name = "Order Quantity",
breaks = c(0,500, 1000, 1500, 2000, 2500, 3000),limits = c(0, 3000),
sec.axis = sec_axis(~ . * 160000 / 3000 , name = "Sales")) +
scale_fill_manual(values = c("darkorange", "darkcyan"))
p2 <- p1 +
geom_point(aes(month, Sales * 3000 / 160000 , color = Region)) +
geom_line(aes(month, Sales * 3000 / 160000 , color = Region)) +
ggtitle("How does East and West compare in 2015?") +
theme_minimal() +
theme(plot.title = element_text(size = 14, face = 'bold', hjust = 0.5),
axis.title.y = element_text(size = 12, face = 'bold'),
axis.title.x = element_text(size = 12, face = 'bold'),
panel.grid.major = element_blank()) +
scale_color_manual(values = c("darkorange", "darkcyan"))
p2
ggplotly(p2)
## Warning: 'bar' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'textposition', 'textfont', 'insidetextfont', 'outsidetextfont', 'orientation', 'base', 'offset', 'width', 'marker', 'r', 't', 'error_y', 'error_x', '_deprecated', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'
## Warning: 'bar' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'textposition', 'textfont', 'insidetextfont', 'outsidetextfont', 'orientation', 'base', 'offset', 'width', 'marker', 'r', 't', 'error_y', 'error_x', '_deprecated', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'