Goal:
1. Download data set from http://www.vizwiz.com/p/workout-wednesday.html
2. Replicate the data visualization as closely as you can
3. Share it on social media with #WorkoutWednesday
Source:
https://www.linkedin.com/pulse/november-data-challenge-kate-strachnyi-/
Dataset:
https://www.womanindata.co.uk/2017/01/workout-wednesday-showing-nothing-when.html
Replicate the following visualization
knitr::include_graphics('dept_sales.png')
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ──────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.7
## ✔ tidyr 0.8.0 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.3
## ── Conflicts ─────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.4.4
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
sales = read.csv('SuperstoreSales.csv')
glimpse(sales)
## Observations: 8,399
## Variables: 23
## $ Row.ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...
## $ Order.ID <int> 3, 6, 32, 32, 32, 32, 35, 35, 36, 65, 66,...
## $ Order.Date <fct> 10/12/13, 2/19/15, 7/14/14, 7/14/14, 7/14...
## $ Order.Priority <fct> Low, Not Specified, High, High, High, Hig...
## $ Order.Quantity <int> 6, 2, 26, 24, 23, 15, 30, 14, 46, 32, 41,...
## $ Sales <dbl> 261.5400, 6.9300, 2808.0800, 1761.4000, 1...
## $ Discount <dbl> 0.04, 0.01, 0.07, 0.09, 0.04, 0.04, 0.03,...
## $ Ship.Mode <fct> Regular Air, Regular Air, Regular Air, De...
## $ Profit <dbl> -213.25, -4.64, 1054.82, -1748.56, -85.13...
## $ Unit.Price <dbl> 38.94, 2.08, 107.53, 70.89, 7.99, 8.46, 9...
## $ Shipping.Cost <dbl> 35.00, 2.56, 5.81, 89.30, 5.03, 8.99, 2.2...
## $ Customer.Name <fct> Muhammed MacIntyre, Ruben Dartt, Liz Pell...
## $ City <fct> Highland Park, Edmonds, Elk Plain, Elk Pl...
## $ Zip.Code <int> 60035, 98026, 98387, 98387, 98387, 27260,...
## $ State <fct> Illinois, Washington, Washington, Washing...
## $ Region <fct> Central, West, West, West, West, South, C...
## $ Customer.Segment <fct> Small Business, Corporate, Corporate, Cor...
## $ Product.Category <fct> Office Supplies, Office Supplies, Furnitu...
## $ Product.Sub.Category <fct> Storage & Organization, Scissors, Rulers ...
## $ Product.Name <fct> Eldon Base for stackable storage shelf, p...
## $ Product.Container <fct> Large Box, Small Pack, Medium Box, Jumbo ...
## $ Product.Base.Margin <dbl> 0.80, 0.55, 0.65, 0.72, 0.60, 0.79, 0.52,...
## $ Ship.Date <fct> 10/19/13, 2/20/15, 7/16/14, 7/15/14, 7/16...
table(sales$Product.Category)
##
## Furniture Office Supplies Technology
## 1724 4610 2065
table(sales$Product.Sub.Category)
##
## Appliances Binders and Binder Accessories
## 434 915
## Bookcases Chairs & Chairmats
## 189 386
## Computer Peripherals Copiers and Fax
## 758 87
## Envelopes Labels
## 246 288
## Office Furnishings Office Machines
## 788 337
## Paper Pens & Art Supplies
## 1225 633
## Rubber Bands Scissors, Rulers and Trimmers
## 179 144
## Storage & Organization Tables
## 546 361
## Telephones and Communication
## 883
sales_subset =
sales %>%
select(Order.Date, Sales, Profit, Product.Category,
Product.Sub.Category)
best_selling_cats =
sales_subset %>%
group_by(Product.Sub.Category) %>%
summarize(Sales = sum(Sales), Profit = sum(Profit)) %>%
arrange(desc(Sales)) %>%
head(5)
## Warning: package 'bindrcpp' was built under R version 3.4.4
head(best_selling_cats)
best_selling_cats = transform(best_selling_cats,
Product.Sub.Category = reorder(Product.Sub.Category, Sales))
p1 = ggplot(best_selling_cats) +
geom_bar(stat = 'identity', width = 0.8,
aes(y = Sales, x= Product.Sub.Category, fill = Profit), color = 'black') +
coord_flip(expand = TRUE, clip = 'off') +
scale_fill_gradient(low="gray", high="skyblue4") +
ylab('Sales (£)') +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = 'bold'),
panel.grid.major = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_text(hjust = 0.2, size = 12, face = 'bold'),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.position = 'top',
legend.text = element_blank(),
legend.spacing = element_blank())
p1
First, view the data structure again
glimpse(sales)
## Observations: 8,399
## Variables: 23
## $ Row.ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...
## $ Order.ID <int> 3, 6, 32, 32, 32, 32, 35, 35, 36, 65, 66,...
## $ Order.Date <fct> 10/12/13, 2/19/15, 7/14/14, 7/14/14, 7/14...
## $ Order.Priority <fct> Low, Not Specified, High, High, High, Hig...
## $ Order.Quantity <int> 6, 2, 26, 24, 23, 15, 30, 14, 46, 32, 41,...
## $ Sales <dbl> 261.5400, 6.9300, 2808.0800, 1761.4000, 1...
## $ Discount <dbl> 0.04, 0.01, 0.07, 0.09, 0.04, 0.04, 0.03,...
## $ Ship.Mode <fct> Regular Air, Regular Air, Regular Air, De...
## $ Profit <dbl> -213.25, -4.64, 1054.82, -1748.56, -85.13...
## $ Unit.Price <dbl> 38.94, 2.08, 107.53, 70.89, 7.99, 8.46, 9...
## $ Shipping.Cost <dbl> 35.00, 2.56, 5.81, 89.30, 5.03, 8.99, 2.2...
## $ Customer.Name <fct> Muhammed MacIntyre, Ruben Dartt, Liz Pell...
## $ City <fct> Highland Park, Edmonds, Elk Plain, Elk Pl...
## $ Zip.Code <int> 60035, 98026, 98387, 98387, 98387, 27260,...
## $ State <fct> Illinois, Washington, Washington, Washing...
## $ Region <fct> Central, West, West, West, West, South, C...
## $ Customer.Segment <fct> Small Business, Corporate, Corporate, Cor...
## $ Product.Category <fct> Office Supplies, Office Supplies, Furnitu...
## $ Product.Sub.Category <fct> Storage & Organization, Scissors, Rulers ...
## $ Product.Name <fct> Eldon Base for stackable storage shelf, p...
## $ Product.Container <fct> Large Box, Small Pack, Medium Box, Jumbo ...
## $ Product.Base.Margin <dbl> 0.80, 0.55, 0.65, 0.72, 0.60, 0.79, 0.52,...
## $ Ship.Date <fct> 10/19/13, 2/20/15, 7/16/14, 7/15/14, 7/16...
sales$Order.Date = mdy(sales$Order.Date)
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2018g.1.0/
## zoneinfo/America/Chicago'
sales_by_month =
sales %>%
arrange(-desc(Order.Date)) %>%
group_by(Product.Category, month = floor_date(Order.Date, "month")) %>%
summarize(total_sales = sum(Sales))
head(sales_by_month)
p2 = ggplot(sales_by_month, aes(x = month, y = total_sales, color = Product.Category)) +
geom_line() +
ggtitle('All Deparments Sales Report') +
theme_minimal() +
xlab('Month') +
labs(color = 'Category') +
theme(plot.title = element_text(size = 18, face = 'bold', hjust = 0.8),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 12, face = 'bold'),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor.x = element_blank(),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14))
p2
p3 = ggplot(sales_by_month, aes(x = month, y = total_sales, color = Product.Category)) +
geom_line() +
ggtitle('All Departments Sales Report') +
theme_minimal() +
xlab('Month') +
labs(color = 'Category') +
theme(plot.title = element_text(size = 18, face = 'bold', hjust = 0.8),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 12, face = 'bold'),
axis.text.y = element_text(size = 12),
axis.title.x = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor.x = element_blank(),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14))
p3 = ggplotly(p3)
p3
ggdraw() +
draw_plot(p2, x = 0, y = 0.5, width = 1, height = 0.5) +
draw_plot(p1, x = 0, y = 0, width = 1, height = 0.5) +
annotate('text', y= 0.4, x = 0.2, size = 4.8, fontface = 'bold',label = 'Best selling categories')
ggsave('dept_sales_NC.png')
## Saving 7 x 5 in image