Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
The original data visualisation is published as part of Eurostat news with title “Environmental protection spending continues to rise”. The target audience is general and objective of article is to create awareness among them about the annual expenditure being incurred by EU nations for environmental protection and its share of Gross domestic Product (GDP).
Further, the article also intends to classify the total expenses into 3 major sub-categories of Households, General Government and non-profit organizations serving household, and body corporations, to understand and analyze better the contribution of each category in overall spending.
The visualisation chosen had the following three main issues which can be fixed or improved:
The use of green colour as upper section of bar graph tends to deceive the reader about growth in expenditure. Also, the background colour is adding no value and could be avoided as seems deceptive with environment care. Hence, deceptive methods of bar charts green colour usage in upper section and background can be improved in visualization.
Usage of Red and green colours is being done together in bar chart sections. This colour selection can be improved to make data visualization conducive for colour blinded people by avoiding these colour use.
By looking at the bar chart indicators, the reader is unable to derive the environmental expenditure value of each category expenses. Also, there is no standard common reference line in bar chart for categories environmental expenses and their values cannot be derived.
This can be improved by redesigning visualization to include categorical expenses and corresponding values through bar charts plots for each category which can be either dodged or faceted together for better categorical comparison of environment expenses using data visualization.
Reference
Environmental protection spending continues to rise. 10/06/2022. Retrieved September 24, 2022 from Ec Europa Eu Eurostat website https://ec.europa.eu/eurostat/web/products-eurostat-news/-/ddn-20220610-1
The following code was used to fix the issues identified in the original.
library(readxl)
library(base)
library(ggplot2)
library(plyr)
library(dplyr)
library(tidyverse)
library(tidyr)
library(methods)
library(graphics)
#Data import and scanning followed by data type conversions
env_ac_epnei_use <- read_excel("env_ac_epnei_use.xlsx",
sheet = "Sheet 1", skip = 7)
#Removing unnecessary rows and columns
env_exp <- env_ac_epnei_use
env_exp1 <- env_exp[c(-1,-2),c(-5, -7,-9)]
#Adding proper column names to data set
colnames(env_exp1) <- c("YEAR", "Total", "GDP", "Corporations", "Government", "Households")
#Conversion
env_exp1$"Total" <- as.numeric(env_exp1$"Total")
env_exp1$"GDP" <- as.numeric(env_exp1$"GDP")
env_exp1$"Corporations" <- as.numeric(env_exp1$"Corporations")
env_exp1$"Government" <- as.numeric(env_exp1$"Government")
env_exp1$"Households" <- as.numeric(env_exp1$"Households")
env_exp1$"YEAR"<- factor(env_exp1$"YEAR", levels=c('2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021'), ordered=TRUE)
#Converting millions to Billions
env_exp1$Total <- env_exp1$Total/1000
env_exp1$Corporations <- env_exp1$Corporations/1000
env_exp1$Government <- env_exp1$Government/1000
env_exp1$Households <- env_exp1$Households/1000
# Checking the structure and the attributes
str(env_exp1)
## tibble [19 × 6] (S3: tbl_df/tbl/data.frame)
## $ YEAR : Ord.factor w/ 16 levels "2006"<"2007"<..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Total : num [1:19] 189 196 205 208 217 ...
## $ GDP : num [1:19] 1.9 1.8 1.9 2 2 2 2 2 2 2 ...
## $ Corporations: num [1:19] 98.9 102 106.3 107.1 113.4 ...
## $ Government : num [1:19] 49.6 52.6 56.1 57 57.9 ...
## $ Households : num [1:19] 40.5 41.5 42.7 43.7 45.5 ...
# identify NAs in full data frame and imputing/deleting them
is.na(env_exp1)
## YEAR Total GDP Corporations Government Households
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE FALSE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] TRUE TRUE TRUE TRUE TRUE TRUE
## [18,] TRUE TRUE TRUE TRUE TRUE TRUE
## [19,] TRUE TRUE TRUE TRUE TRUE TRUE
which(is.na(env_exp1))
## [1] 17 18 19 36 37 38 55 56 57 74 75 76 93 94 95 112 113 114
exp2 <- env_exp1[c(-17, -18, -19),]
exp2 <- as.data.frame(exp2)
str(exp2)
## 'data.frame': 16 obs. of 6 variables:
## $ YEAR : Ord.factor w/ 16 levels "2006"<"2007"<..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Total : num 189 196 205 208 217 ...
## $ GDP : num 1.9 1.8 1.9 2 2 2 2 2 2 2 ...
## $ Corporations: num 98.9 102 106.3 107.1 113.4 ...
## $ Government : num 49.6 52.6 56.1 57 57.9 ...
## $ Households : num 40.5 41.5 42.7 43.7 45.5 ...
summary(exp2)
## YEAR Total GDP Corporations Government
## 2006 : 1 Min. :189.0 Min. :1.800 Min. : 98.88 Min. :49.60
## 2007 : 1 1st Qu.:214.5 1st Qu.:2.000 1st Qu.:111.85 1st Qu.:57.46
## 2008 : 1 Median :232.1 Median :2.000 Median :122.38 Median :58.52
## 2009 : 1 Mean :237.3 Mean :1.981 Mean :126.87 Mean :60.03
## 2010 : 1 3rd Qu.:259.2 3rd Qu.:2.000 3rd Qu.:141.36 3rd Qu.:63.54
## 2011 : 1 Max. :291.7 Max. :2.100 Max. :160.38 Max. :69.97
## (Other):10
## Households
## Min. :40.47
## 1st Qu.:45.03
## Median :51.47
## Mean :50.41
## 3rd Qu.:54.26
## Max. :61.31
##
library(tidyr)
#Creating datas set for categorical comparison and visualization
exp3 <- exp2 %>% gather(`Corporations`, `Government`,`Households`, key = "Category", value = "Expenses")
exp3$"Category" <- as.factor(exp3$"Category")
str(exp3)
## 'data.frame': 48 obs. of 5 variables:
## $ YEAR : Ord.factor w/ 16 levels "2006"<"2007"<..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Total : num 189 196 205 208 217 ...
## $ GDP : num 1.9 1.8 1.9 2 2 2 2 2 2 2 ...
## $ Category: Factor w/ 3 levels "Corporations",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Expenses: num 98.9 102 106.3 107.1 113.4 ...
#Plot for Total Environmental expenses
p3 <- ggplot(exp2, aes(x = YEAR, y = Total))
p3 + geom_bar(stat = "identity", fill = "#8da0cb") + theme_minimal() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
labs(title = "National Expenditure On Environmental Protection in EU,\n 2006-2021, Cumulative", y = "Total Expense \n (Billion Euro)",
x = "YEAR") +
geom_text(aes(label=round(Total, 2)), vjust = -0.5,size = 3) +
scale_y_continuous(limits = c(0,300))
#Plot for Total Environmental expenses as % of GDP
p4 <- ggplot(data = exp2, aes(group = 1, x = YEAR,y = GDP))
p4 <- p4 + geom_line(stat = "identity", colour = "deepskyblue") + theme_minimal()+ theme(axis.text.x=element_text(angle=45,hjust=1)) + labs(title = "National Expenditure On Environmental Protection in EU,\n 2006-2021, GDP % of Cumulative expense", y = "% of GDP (%)", x = "YEAR")+ geom_point(colour = "deepskyblue")+ scale_y_continuous(limits = c(0,3))
p4
#Plot for Environmental expenses for each category with dodged Bar plots and values for easy comparison through data visualization
p5 <- ggplot(exp3, aes(fill=Category, y=Expenses, x=YEAR))
p5 + geom_bar(position="dodge", stat="identity")+ scale_fill_brewer(palette="Paired") + theme_minimal()+ theme(axis.text.x=element_text(angle=45,hjust=1)) + labs(title = "National Expenditure On Environmental Protection in EU,\n 2006-2021, Categorical comparison", y = "Total Expense \n (Billion Euro)", x = "YEAR") + geom_text(aes(label=round(Expenses, 2)), vjust = 0.6,position = position_dodge(1), size = 3, angle=90)+scale_y_continuous(limits = c(0,200))
#Plot for Environmental expenses for each category with faceted bar plots for easy comparison through visualization
p9 <- ggplot(exp3, aes(YEAR, Expenses)) + geom_bar(stat = "identity",fill = "deepskyblue") + theme_minimal()+ theme(axis.text.x=element_text(angle=45,hjust=1)) + labs(title = "National Expenditure On Environmental Protection in EU,\n 2006-2021, Categorical comparison", y = "Total Expense \n (Billion Euro)", x = "YEAR") + geom_text(aes(label=round(Expenses, 2)), vjust = 0.6, size = 3, angle=90)+scale_y_continuous(limits = c(0,200))
p9 + facet_grid(rows = vars(Category))
Data Reference National expenditure on environmental protection by institutional sector online data code: ENV_AC_EPNEIS last update: 10/06/2022 07:00. Source of dataset: Eurostat., accessed 24 September 2022. Website https://ec.europa.eu/eurostat/databrowser/bookmark/01e1443d-4e86-4a41-9430-5d6dd7649991?lang=en
The following plot fixes the main issues in the original.