Click the Original, Code and Reconstruction tabs to learn more about the issues with the original data visualization and how these issues were fixed in the reconstructed data visualization.
Objective
In this assignment the data visualization has been sourced from the ABS website mentioned in the reference below. The Data had last been released on 14/12/2022 so that has been used as the next release is on 13/12/2023. The visualization used in this assignment is for the period 1984-2022 showing all employees having casual jobs.The Australian Bureau determines the employment of an employee to be casual or no based on the entitlement of an employee for paid leaves.
In this visualization the objective isn’t communicated clearly, the article needs to be improvised to understand the plot in a proper way. Moreover, due to the choice of colors it was difficult to understand the plot.
Reference
1] “Working Arrangements, August 2022,” Australian Bureau of Statistics, https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/working-arrangements/aug-2022 (accessed Jun. 23, 2023).
# Importing Libraries
#install.packages("janitor")
#install.packages("extrafont")
#install.packages("showtext")
#install.packages("grid")
library(readr)
library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.1 v purrr 1.0.1
## v forcats 1.0.0 v stringr 1.5.0
## v ggplot2 3.4.2 v tibble 3.2.1
## v lubridate 1.9.2 v tidyr 1.3.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot.multistats)
library(ggplot2)
library(lubridate)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(extrafont)
## Registering fonts with R
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
##
## Attaching package: 'showtextdb'
##
## The following object is masked from 'package:extrafont':
##
## font_install
library(grid)
# Reading data set
shares <- read_csv("shares.csv",skip=1,na="0")
## New names:
## * `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 157 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): ...1
## dbl (3): Men (%), Women (%), Total (%)
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(shares)
## # A tibble: 6 x 4
## ...1 `Men (%)` `Women (%)` `Total (%)`
## <chr> <dbl> <dbl> <dbl>
## 1 Aug-84 9.1 25.7 15.7
## 2 Nov-84 NA NA NA
## 3 Feb-85 NA NA NA
## 4 May-85 NA NA NA
## 5 Aug-85 9 25.9 15.8
## 6 Nov-85 NA NA NA
colnames(shares)
## [1] "...1" "Men (%)" "Women (%)" "Total (%)"
## Renaming the first col
shares<- shares%>%rename(Yr=1)
shares
## # A tibble: 157 x 4
## Yr `Men (%)` `Women (%)` `Total (%)`
## <chr> <dbl> <dbl> <dbl>
## 1 Aug-84 9.1 25.7 15.7
## 2 Nov-84 NA NA NA
## 3 Feb-85 NA NA NA
## 4 May-85 NA NA NA
## 5 Aug-85 9 25.9 15.8
## 6 Nov-85 NA NA NA
## 7 Feb-86 NA NA NA
## 8 May-86 NA NA NA
## 9 Aug-86 9.9 27.4 17
## 10 Nov-86 NA NA NA
## # i 147 more rows
From the above table it can be seen that the first column is “chr” type.
It needs to be changed to date.
shares$Yr<-as.Date(paste0("01-",shares$Yr), format="%d-%b-%y")
head(shares)
## # A tibble: 6 x 4
## Yr `Men (%)` `Women (%)` `Total (%)`
## <date> <dbl> <dbl> <dbl>
## 1 1984-08-01 9.1 25.7 15.7
## 2 1984-11-01 NA NA NA
## 3 1985-02-01 NA NA NA
## 4 1985-05-01 NA NA NA
## 5 1985-08-01 9 25.9 15.8
## 6 1985-11-01 NA NA NA
class(shares$Yr)
## [1] "Date"
#shares<- shares%>%rename(M=Men(%))
colnames(shares)[colnames(shares) == "Men (%)"] <- "Men"
colnames(shares)[colnames(shares) == "Men (%)"] <- "Men"
colnames(shares)[colnames(shares) == "Women (%)"] <- "Women"
colnames(shares)[colnames(shares) == "Total (%)"] <- "Total"
colnames(shares)
## [1] "Yr" "Men" "Women" "Total"
# Clearing empty value rows
shares%>%filter(Men!="" | Women!="" |Total!="")
## # A tibble: 63 x 4
## Yr Men Women Total
## <date> <dbl> <dbl> <dbl>
## 1 1984-08-01 9.1 25.7 15.7
## 2 1985-08-01 9 25.9 15.8
## 3 1986-08-01 9.9 27.4 17
## 4 1987-08-01 11.3 27.4 18.1
## 5 1988-08-01 11.2 27 17.9
## 6 1989-08-01 11.6 28.4 18.9
## 7 1990-08-01 11.3 27.2 18.3
## 8 1991-08-01 13.2 28.2 20
## 9 1992-08-01 13.6 29.2 20.7
## 10 1993-08-01 14.5 28.8 20.9
## # i 53 more rows
# Plot
plot1<-shares%>%
ggplot()+
geom_line(aes(x=Yr, y=Total, col="Total")) +
geom_line(aes(x=Yr, y=Men, col="Men")) +
geom_line(aes(x=Yr, y=Women, col="Women")) +
scale_color_manual(values = c("#BD88AB","#439FB7","#976894")) +
geom_point(aes(x=Yr, y=Total), shape=21, color="#CD9ABC", fill="#7CADD2") +
geom_point(aes(x=Yr, y=Men), shape=21, color="#54BDC2", fill="#1C5A99")+
geom_point(aes(x=Yr, y=Women), shape=21, color="#C590B3", fill="#BB243F")
#plot1
vline <- as.Date(c("2000-08-01"))
plot2<-plot1+labs(x="Yr", y="Emp %", title="The share of casual employment", subtitle = "percentage of employees who work casual hours", caption ="Aus Bureau of Statistics ") +
geom_vline(xintercept=vline, col="#DAABCB", size=1.5, linetype="dotdash")+
scale_x_date(date_breaks = "10 year", date_labels = "%Y")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#plot2
plot3<-plot2 +theme (
legend.position = c(1, 0.9),
legend.direction = "horizontal",
legend.justification = c("right","bottom"),
legend.box.just = "right",
legend.title=element_blank(),
legend.background = element_rect(fill="white"),
legend.key = element_rect(fill = "white"),
legend.key.width = unit(1.0, "cm"),
panel.border = element_blank(),
panel.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_line(size = 0.5, linetype = 'dotted',
colour = "#EEC9E5"),
panel.grid.minor = element_line(size = 0.05, linetype = 'dotted',
colour = "#EEC9E5"),
plot.background = element_rect(fill="grey95"),
axis.line = element_line(size=2.0, colour = "black"),
text = element_text(family='Helvetica', color="black"),
plot.title = element_text(size = rel(2), hjust=0),
plot.subtitle = element_text(size = rel(1), hjust=0),
plot.caption = element_text(hjust = 0))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## i Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#plot3
curve_start <-as.Date(c("2008-08-01"))
annot_strt <- as.Date(c("2014-11-01"))
annot2000 <- as.Date(c("2001-08-01"))
plot4<- plot3+annotate("text", x = annot_strt,y = 13, label = " series excludes OMIEs",family="Hevetica",fontface="italic", angle=0, size=3,colour='black', face="bold") +annotate("text", x = annot2000, y = 16, label = "Year = 2000",family="Hevetica", angle=90, size=4, colour='black', face="bold") + geom_curve(aes(x = curve_start, y = 15, xend =vline, yend = 13), colour='deeppink4', size=0.5,arrow = arrow(length = unit(0.04, "npc")), alpha=0.1)
## Warning in annotate("text", x = annot_strt, y = 13, label = " series excludes
## OMIEs", : Ignoring unknown parameters: `face`
## Warning in annotate("text", x = annot2000, y = 16, label = "Year = 2000", :
## Ignoring unknown parameters: `face`
** Reference**
[1] “R color palettes,” R CHARTS | A collection of charts and graphs made with the R programming language, https://r-charts.com/color-palettes/ (accessed Jun. 23, 2023).
[2] “Working Arrangements, August 2022,” Australian Bureau of Statistics, https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/working-arrangements/aug-2022 (accessed Jun. 23, 2023).
This visualization shows the reconstruction dealing with the issues at the beginning of this assignment.
An intercept line has been used as can be seen in the below plot.Distinguishing both the genders is easier with prominent colors.
plot4
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Removed 4 rows containing missing values (`geom_line()`).
## Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 94 rows containing missing values (`geom_point()`).
## Removed 94 rows containing missing values (`geom_point()`).
## Removed 94 rows containing missing values (`geom_point()`).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database