Load packages

pacman::p_load(
  rio,          
  here,         
  tidyverse,
  lubridate,
  plotly,
  gganimate,
  ggExtra,      
  ggalt,      
  ggcorrplot,
  ggpubr,
  ggplot2, # ggarrange()
  zoo,
  data.table, 
  formattable,
  tidyr,
  ggpmisc, # stat_valey
  ggalluvial, # sankey chart
  RColorBrewer,  # color pattlet 
  CGPfunctions, # slop graph
  ggrepel, # avoid overlap points
  ggthemes,
  viridis
  )

Data Visualisation

In this project, I am going to use the open data named Premium unleaded petrol prices excluding tax and duty compared with EU countries which is published by GOV.UK

Import data

mydata <- import(here("data", "table_511_and_521.xlsx"), which = "5.1.1 (excl tax)", skip = 7)  

Data cleaning

# Price excl tax 
data <- mydata %>% janitor::clean_names()   # clean cols name 
colnames(data)[18] <- 'UK' # rename 

# remove NA value 
data <- data[complete.cases(data), ]

# remove unwanted cols 
data$month <- NULL
data$day_in_month_of_price_snapshot <- NULL
data$uk_rank_in_eu14_plus_uk <- NULL
data$uk_rank_in_eu27_plus_uk <- NULL

# change variable class 
data$year <- as.factor(data$year)
# calculate annual avg price (not weekly price)
data <- data %>% 
  group_by(year) %>% 
    summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# wide to long format 
dtslop <- data %>% 
  pivot_longer(
    cols = belgium:slovenia,
    names_to = "country",
    values_to = "price_excl_tax"
  )

# rounding value 
dtslop$price_excl_tax <- round(dtslop$price_excl_tax, 1)

# class check 
str(dtslop)
## tibble [297 × 3] (S3: tbl_df/tbl/data.frame)
##  $ year          : Factor w/ 11 levels "2013","2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ country       : chr [1:297] "belgium" "denmark" "finland" "france" ...
##  $ price_excl_tax: num [1:297] 58.2 63.4 58.7 55.2 58 57.8 57.2 59.9 57.9 56.8 ...
# Price incl tax
mydata2 <- import(here("data", "table_511_and_521.xlsx"), which = "5.1.1 (incl tax)", skip = 7)

data2 <- mydata2 %>% janitor::clean_names()   # clean cols name 
colnames(data2)[18] <- 'UK' # rename 

data2 <- data2[complete.cases(data2), ]

data2$month <- NULL
data2$day_in_month_of_price_snapshot <- NULL
data2$uk_rank_in_eu14_plus_uk <- NULL
data2$uk_rank_in_eu27_plus_uk <- NULL

data2$year <- as.factor(data2$year)
# calculate annual avg price (not weekly price)
data2 <- data2 %>% 
  group_by(year) %>% 
  summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# wide to long format 
dtslop2 <- data2 %>% 
  pivot_longer(
    cols = belgium:slovenia,
    names_to = "country",
    values_to = "price_incl_tax"
  )

# rounding value 
dtslop2$price_incl_tax <- round(dtslop2$price_incl_tax, 0)

# data cleaning 
dtam <- cbind(dtslop, dtslop2$price_incl_tax) 
colnames(dtam)[4] <- 'price_incl_tax'
dtam$price_incl_tax <- round(dtam$price_incl_tax, 1)

Import the map data

world_map <- map_data("world") # get region and long-lat 
colnames(dtam)[2] <- 'region' # rename 
dtam$region<- R.utils::capitalize(dtam$region)

dt.map <- left_join(dtam, world_map, by = "region", relationship = "many-to-many") # to avoid Warning: Detected an unexpected many-to-many relationship between `x` and `y`
# selection 1 year 
dtest <- dt.map %>% filter(year %in% c("2020"))

excl <- ggplot(dtest, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  labs(title = " ",
       subtitle = "Price excl tax - Pence (p/litre)",
       x = "", y = "") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(
    legend.justification = c(0, 1),
    legend.position = c(1, .50)) 


incl <- ggplot(dtest, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_incl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  labs(title = " ",
       subtitle = "Price incl tax - Pence (p/litre)",
       x = "", y = "") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(
    legend.justification = c(0, 1),
    legend.position = c(1, .50)) 

incl <- ggplot(dtest, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_incl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  labs(title = " ",
       subtitle = "Price incl tax - Pence (p/litre)",
       x = "", y = "") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void()  +
  theme(
    legend.justification = c(0, 1),
    legend.position = c(1, .50)) 
figure <- ggarrange(excl, incl, ncol=2, nrow=1, common.legend = TRUE, legend="bottom")

annotate_figure(figure,
                top = text_grob("2020 EU Nation Fuel Price", color = "black", face = "bold", size = 14),
                bottom = text_grob("Data source: GOV.UK 2023", color = "black",
                                   hjust = 1, x = 1, face = "italic", size = 10),
                left = text_grob("Figure arranged using ggpubr", color = "grey", rot = 90, size = 8),
                right = " ",
                fig.lab = "Figure 1", fig.lab.face = "bold"
                )

dt2022 <- dt.map %>% filter(year %in% c("2022"))
dt2021 <- dt.map %>% filter(year %in% c("2021"))
dt2020 <- dt.map %>% filter(year %in% c("2020"))
dt2019 <- dt.map %>% filter(year %in% c("2019"))
dt2018 <- dt.map %>% filter(year %in% c("2018"))
dt2017 <- dt.map %>% filter(year %in% c("2017"))


g17 <- ggplot(dt2017, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "C") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(legend.position = "none") +
  labs(subtitle = "2017",
       x = "", y = "")

g18 <- ggplot(dt2018, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "C") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(legend.position = "none") +
  labs(subtitle = "2018",
       x = "", y = "")

g19 <- ggplot(dt2019, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(legend.position = "none") +
  labs(subtitle = "2019",
       x = "", y = "")

g20 <- ggplot(dt2020, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(legend.position = "none") +
  labs(subtitle = "2020",
       x = "", y = "")

g21 <- ggplot(dt2021, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(legend.position = "none") +
  labs(subtitle = "2021",
       x = "", y = "")

g22 <- ggplot(dt2022, aes(long, lat, group = group))+
  geom_polygon(aes(fill = price_excl_tax ), color = "white")+
  scale_fill_viridis_c(option = "D") +
  theme(axis.text = element_blank(), axis.ticks = element_blank()) +
  coord_fixed() + 
  theme_void() +
  theme(legend.position = "none") +
  labs(subtitle = "2022",
       x = "", y = "")
ggarrange(g19, g20, g21, g22, ncol=2, nrow=2, common.legend = TRUE, legend="right")

figure.2 <- ggarrange(g19, g20, g21, g22, ncol=2, nrow=2, common.legend = TRUE, legend="right")

annotate_figure(figure.2,
                top = text_grob("2020 EU Nation Fuel Price", color = "black", face = "bold", size = 14),
                bottom = text_grob("Data source: GOV.UK 2023", color = "black",
                                   hjust = 1, x = 1, face = "italic", size = 10),
                left = text_grob("Figure for (mean) price excluding tax & duty", color = "purple", rot = 90, size = 8),
                right = " "                
                )