Goal:
1. Download data set from http://www.vizwiz.com/p/workout-wednesday.html
2. Replicate the data visualization as closely as you can
3. Share it on social media with #WorkoutWednesday

Source:
https://www.linkedin.com/pulse/november-data-challenge-kate-strachnyi-/

Dataset:
https://www.womanindata.co.uk/2017/05/workoutwednesday-week-20-comparing.html

Replicate the following visualization

knitr::include_graphics("sales_comparison.png")

Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## ── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.7
## ✔ tidyr   0.8.0     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'forcats' was built under R version 3.4.3
## ── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(readxl)
## Warning: package 'readxl' was built under R version 3.4.4
library(scales)
## Warning: package 'scales' was built under R version 3.4.4
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
Read in the ‘SuperstoreSales’ file
sales = read_xlsx('superstore.xlsx')
## Warning in strptime(x, format, tz = tz): unknown timezone 'zone/tz/2018g.
## 1.0/zoneinfo/America/Chicago'
View data structure
glimpse(sales)
## Observations: 8,399
## Variables: 23
## $ `Row ID`               <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ...
## $ `Order ID`             <dbl> 3, 6, 32, 32, 32, 32, 35, 35, 36, 65, 6...
## $ `Order Date`           <dttm> 2013-10-12, 2015-02-19, 2014-07-14, 20...
## $ `Order Priority`       <chr> "Low", "Not Specified", "High", "High",...
## $ `Order Quantity`       <dbl> 6, 2, 26, 24, 23, 15, 30, 14, 46, 32, 4...
## $ Sales                  <dbl> 261.5400, 6.9300, 2808.0800, 1761.4000,...
## $ Discount               <dbl> 0.04, 0.01, 0.07, 0.09, 0.04, 0.04, 0.0...
## $ `Ship Mode`            <chr> "Regular Air", "Regular Air", "Regular ...
## $ Profit                 <dbl> -213.2500, -4.6400, 1054.8200, -1748.56...
## $ `Unit Price`           <dbl> 38.94, 2.08, 107.53, 70.89, 7.99, 8.46,...
## $ `Shipping Cost`        <dbl> 35.00, 2.56, 5.81, 89.30, 5.03, 8.99, 2...
## $ `Customer Name`        <chr> "Muhammed MacIntyre", "Ruben Dartt", "L...
## $ City                   <chr> "Highland Park", "Edmonds", "Elk Plain"...
## $ `Zip Code`             <chr> "60035", "98026", "98387", "98387", "98...
## $ State                  <chr> "Illinois", "Washington", "Washington",...
## $ Region                 <chr> "Central", "West", "West", "West", "Wes...
## $ `Customer Segment`     <chr> "Small Business", "Corporate", "Corpora...
## $ `Product Category`     <chr> "Office Supplies", "Office Supplies", "...
## $ `Product Sub-Category` <chr> "Storage & Organization", "Scissors, Ru...
## $ `Product Name`         <chr> "Eldon Base for stackable storage shelf...
## $ `Product Container`    <chr> "Large Box", "Small Pack", "Medium Box"...
## $ `Product Base Margin`  <dbl> 0.80, 0.55, 0.65, 0.72, 0.60, 0.79, 0.5...
## $ `Ship Date`            <dttm> 2013-10-19, 2015-02-20, 2014-07-16, 20...

Filter out the variables we want and rename the columns

sales <- 
  sales %>% 
  select("Order Date","Order Quantity", "Region", "Sales")

colnames(sales) <- c("Order_Date", "Order_Quantity", "Region", "Sales")

Choose only the values for Region East and West in 2015

sales <- 
  sales %>% 
  filter((Region == "West" | Region == "East") & (year(Order_Date) == "2015"))
## Warning: package 'bindrcpp' was built under R version 3.4.4

Investigate our filtered dataframe

glimpse(sales)
## Observations: 993
## Variables: 4
## $ Order_Date     <dttm> 2015-02-19, 2015-04-04, 2015-04-04, 2015-03-02...
## $ Order_Quantity <dbl> 2, 49, 6, 50, 44, 13, 5, 30, 41, 24, 5, 7, 21, ...
## $ Region         <chr> "West", "West", "West", "West", "West", "East",...
## $ Sales          <dbl> 6.930, 329.030, 20.190, 7666.040, 4509.377, 70....

Summary Statistics

summary(sales)
##    Order_Date                  Order_Quantity     Region         
##  Min.   :2015-01-01 00:00:00   Min.   : 1.00   Length:993        
##  1st Qu.:2015-03-30 00:00:00   1st Qu.:13.00   Class :character  
##  Median :2015-06-29 00:00:00   Median :26.00   Mode  :character  
##  Mean   :2015-06-29 16:18:51   Mean   :25.97                     
##  3rd Qu.:2015-09-25 00:00:00   3rd Qu.:39.00                     
##  Max.   :2015-12-30 00:00:00   Max.   :50.00                     
##      Sales         
##  Min.   :    3.96  
##  1st Qu.:  139.66  
##  Median :  498.68  
##  Mean   : 1824.34  
##  3rd Qu.: 1921.13  
##  Max.   :33367.85

Plotting

sales_subset  <-  sales %>% 
  mutate(month = floor_date(Order_Date, "month")) %>%
  group_by(Region, month) %>%
  summarize(Order_Quantity = sum(Order_Quantity),
            Sales = sum(Sales))


p1 <- ggplot(sales_subset) +
  geom_col(position = "dodge", alpha = 0.7, aes(month, Order_Quantity, fill = Region)) +
  xlab("Month") + 
  scale_x_datetime(date_breaks = "1 month",
               date_labels = "%b") +
  scale_y_continuous(name = "Order Quantity", 
                        breaks = c(0,500, 1000, 1500, 2000, 2500, 3000),limits = c(0, 3000),
                        sec.axis = sec_axis(~ . * 160000 / 3000 , name = "Sales")) +
  scale_fill_manual(values = c("darkorange", "darkcyan"))

p2 <- p1 + 
  geom_point(aes(month, Sales * 3000 / 160000 , color = Region)) +
  geom_line(aes(month, Sales * 3000 / 160000 , color = Region)) +
  ggtitle("How does East and West compare in 2015?") +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = 'bold', hjust = 0.5),
        axis.title.y = element_text(size = 12, face = 'bold'), 
        axis.title.x = element_text(size = 12, face = 'bold'),
        panel.grid.major = element_blank()) +
  scale_color_manual(values = c("darkorange", "darkcyan"))

p2

Interactive Plot

ggplotly(p2)
## Warning: 'bar' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'textposition', 'textfont', 'insidetextfont', 'outsidetextfont', 'orientation', 'base', 'offset', 'width', 'marker', 'r', 't', 'error_y', 'error_x', '_deprecated', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'

## Warning: 'bar' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'hoverinfo', 'hoverlabel', 'stream', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'textposition', 'textfont', 'insidetextfont', 'outsidetextfont', 'orientation', 'base', 'offset', 'width', 'marker', 'r', 't', 'error_y', 'error_x', '_deprecated', 'xaxis', 'yaxis', 'xcalendar', 'ycalendar', 'idssrc', 'customdatasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'textpositionsrc', 'basesrc', 'offsetsrc', 'widthsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule'