Goal:
1. Download data set from http://www.vizwiz.com/p/workout-wednesday.html
2. Replicate the data visualization as closely as you can
3. Share it on social media with #WorkoutWednesday

Source:
https://www.linkedin.com/pulse/november-data-challenge-kate-strachnyi-/

Dataset:
https://www.womanindata.co.uk/2017/01/workout-wednesday-showing-nothing-when.html

Replicate the following visualization

knitr::include_graphics('dept_sales.png')

Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ──────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.7
## ✔ tidyr   0.8.0     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.3
## ── Conflicts ─────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.4.4
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
Read in the ‘SuperstoreSales’ file
sales = read.csv('SuperstoreSales.csv')
View data structure
glimpse(sales)
## Observations: 8,399
## Variables: 23
## $ Row.ID               <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...
## $ Order.ID             <int> 3, 6, 32, 32, 32, 32, 35, 35, 36, 65, 66,...
## $ Order.Date           <fct> 10/12/13, 2/19/15, 7/14/14, 7/14/14, 7/14...
## $ Order.Priority       <fct> Low, Not Specified, High, High, High, Hig...
## $ Order.Quantity       <int> 6, 2, 26, 24, 23, 15, 30, 14, 46, 32, 41,...
## $ Sales                <dbl> 261.5400, 6.9300, 2808.0800, 1761.4000, 1...
## $ Discount             <dbl> 0.04, 0.01, 0.07, 0.09, 0.04, 0.04, 0.03,...
## $ Ship.Mode            <fct> Regular Air, Regular Air, Regular Air, De...
## $ Profit               <dbl> -213.25, -4.64, 1054.82, -1748.56, -85.13...
## $ Unit.Price           <dbl> 38.94, 2.08, 107.53, 70.89, 7.99, 8.46, 9...
## $ Shipping.Cost        <dbl> 35.00, 2.56, 5.81, 89.30, 5.03, 8.99, 2.2...
## $ Customer.Name        <fct> Muhammed MacIntyre, Ruben Dartt, Liz Pell...
## $ City                 <fct> Highland Park, Edmonds, Elk Plain, Elk Pl...
## $ Zip.Code             <int> 60035, 98026, 98387, 98387, 98387, 27260,...
## $ State                <fct> Illinois, Washington, Washington, Washing...
## $ Region               <fct> Central, West, West, West, West, South, C...
## $ Customer.Segment     <fct> Small Business, Corporate, Corporate, Cor...
## $ Product.Category     <fct> Office Supplies, Office Supplies, Furnitu...
## $ Product.Sub.Category <fct> Storage & Organization, Scissors, Rulers ...
## $ Product.Name         <fct> Eldon Base for stackable storage shelf, p...
## $ Product.Container    <fct> Large Box, Small Pack, Medium Box, Jumbo ...
## $ Product.Base.Margin  <dbl> 0.80, 0.55, 0.65, 0.72, 0.60, 0.79, 0.52,...
## $ Ship.Date            <fct> 10/19/13, 2/20/15, 7/16/14, 7/15/14, 7/16...
Check to see how many categories of products there are
table(sales$Product.Category)
## 
##       Furniture Office Supplies      Technology 
##            1724            4610            2065
Check to see how many categories of sub products there are
table(sales$Product.Sub.Category)
## 
##                     Appliances Binders and Binder Accessories 
##                            434                            915 
##                      Bookcases             Chairs & Chairmats 
##                            189                            386 
##           Computer Peripherals                Copiers and Fax 
##                            758                             87 
##                      Envelopes                         Labels 
##                            246                            288 
##             Office Furnishings                Office Machines 
##                            788                            337 
##                          Paper            Pens & Art Supplies 
##                           1225                            633 
##                   Rubber Bands  Scissors, Rulers and Trimmers 
##                            179                            144 
##         Storage & Organization                         Tables 
##                            546                            361 
##   Telephones and Communication 
##                            883
Subset the dataframe to get only variables we need
sales_subset = 
  sales %>%
  select(Order.Date, Sales, Profit, Product.Category, 
         Product.Sub.Category)
From the filtered dataframe, aggregate the sales by sub-categories, then filter the top selling categories (top 5)
best_selling_cats = 
  sales_subset %>%
    group_by(Product.Sub.Category) %>%
    summarize(Sales = sum(Sales), Profit = sum(Profit)) %>%
    arrange(desc(Sales)) %>%
    head(5)
## Warning: package 'bindrcpp' was built under R version 3.4.4
head(best_selling_cats)
Transform the dataframe to get the descending order necessary for plotting the bar plot
best_selling_cats = transform(best_selling_cats, 
            Product.Sub.Category = reorder(Product.Sub.Category, Sales))
Plot the first plot
p1 = ggplot(best_selling_cats) +
  geom_bar(stat = 'identity', width = 0.8,
           aes(y = Sales, x= Product.Sub.Category, fill = Profit), color = 'black') +
  coord_flip(expand = TRUE, clip = 'off') +
  scale_fill_gradient(low="gray", high="skyblue4") +
  ylab('Sales (£)') +
  theme_minimal() +
  theme(plot.title = element_text(size = 16, face = 'bold'),
        panel.grid.major = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_text(hjust = 0.2, size = 12, face = 'bold'),
        axis.text.x = element_text(size = 11),
        axis.text.y = element_text(size = 11),
        legend.position = 'top',
        legend.text = element_blank(),
        legend.spacing = element_blank()) 
  
p1

For second plot, we will need to sort sales by order date (ascending), then aggregate the sales by category by month.

First, view the data structure again

glimpse(sales)
## Observations: 8,399
## Variables: 23
## $ Row.ID               <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...
## $ Order.ID             <int> 3, 6, 32, 32, 32, 32, 35, 35, 36, 65, 66,...
## $ Order.Date           <fct> 10/12/13, 2/19/15, 7/14/14, 7/14/14, 7/14...
## $ Order.Priority       <fct> Low, Not Specified, High, High, High, Hig...
## $ Order.Quantity       <int> 6, 2, 26, 24, 23, 15, 30, 14, 46, 32, 41,...
## $ Sales                <dbl> 261.5400, 6.9300, 2808.0800, 1761.4000, 1...
## $ Discount             <dbl> 0.04, 0.01, 0.07, 0.09, 0.04, 0.04, 0.03,...
## $ Ship.Mode            <fct> Regular Air, Regular Air, Regular Air, De...
## $ Profit               <dbl> -213.25, -4.64, 1054.82, -1748.56, -85.13...
## $ Unit.Price           <dbl> 38.94, 2.08, 107.53, 70.89, 7.99, 8.46, 9...
## $ Shipping.Cost        <dbl> 35.00, 2.56, 5.81, 89.30, 5.03, 8.99, 2.2...
## $ Customer.Name        <fct> Muhammed MacIntyre, Ruben Dartt, Liz Pell...
## $ City                 <fct> Highland Park, Edmonds, Elk Plain, Elk Pl...
## $ Zip.Code             <int> 60035, 98026, 98387, 98387, 98387, 27260,...
## $ State                <fct> Illinois, Washington, Washington, Washing...
## $ Region               <fct> Central, West, West, West, West, South, C...
## $ Customer.Segment     <fct> Small Business, Corporate, Corporate, Cor...
## $ Product.Category     <fct> Office Supplies, Office Supplies, Furnitu...
## $ Product.Sub.Category <fct> Storage & Organization, Scissors, Rulers ...
## $ Product.Name         <fct> Eldon Base for stackable storage shelf, p...
## $ Product.Container    <fct> Large Box, Small Pack, Medium Box, Jumbo ...
## $ Product.Base.Margin  <dbl> 0.80, 0.55, 0.65, 0.72, 0.60, 0.79, 0.52,...
## $ Ship.Date            <fct> 10/19/13, 2/20/15, 7/16/14, 7/15/14, 7/16...
Formate ‘Order.Date’ column
sales$Order.Date = mdy(sales$Order.Date)
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2018g.1.0/
## zoneinfo/America/Chicago'
Perform the aforementioned task
sales_by_month = 
  sales %>%
  arrange(-desc(Order.Date)) %>%
  group_by(Product.Category, month = floor_date(Order.Date, "month")) %>%
  summarize(total_sales = sum(Sales))
Double check the result
head(sales_by_month)
Plot the sales by category over the time period (Jan 2012 to Dec 2015)
p2 = ggplot(sales_by_month, aes(x = month, y = total_sales, color = Product.Category)) +
  geom_line() +
  ggtitle('All Deparments Sales Report') +
  theme_minimal() +
  xlab('Month') + 
  labs(color = 'Category') +
  theme(plot.title = element_text(size = 18, face = 'bold', hjust = 0.8),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 12, face = 'bold'),
        axis.text.y = element_text(size = 12),
        axis.title.x = element_text(size = 14),
        panel.grid.major = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14))

p2

Interactive part. Convert p2 to interactive plot.
p3 = ggplot(sales_by_month, aes(x = month, y = total_sales, color = Product.Category)) +
  geom_line() +
  ggtitle('All Departments Sales Report') +
  theme_minimal() +
  xlab('Month') + 
  labs(color = 'Category') +
  theme(plot.title = element_text(size = 18, face = 'bold', hjust = 0.8),
        axis.title.y = element_blank(),
        axis.text.x = element_text(size = 12, face = 'bold'),
        axis.text.y = element_text(size = 12),
        axis.title.x = element_text(size = 14),
        panel.grid.major = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14))

p3 = ggplotly(p3)

p3
Arrange the two plots in one dashboard
ggdraw() +
  draw_plot(p2, x = 0, y = 0.5, width = 1, height = 0.5) +
  draw_plot(p1, x = 0, y = 0, width = 1, height = 0.5) +
  annotate('text', y= 0.4, x = 0.2, size = 4.8, fontface = 'bold',label = 'Best selling categories')

ggsave('dept_sales_NC.png')
## Saving 7 x 5 in image