pacman::p_load(
rio,
here,
tidyverse,
lubridate,
plotly,
gganimate,
ggExtra,
ggalt,
ggcorrplot,
ggpubr,
ggplot2, # ggarrange()
zoo,
data.table,
formattable,
tidyr,
ggpmisc, # stat_valey
ggalluvial, # sankey chart
RColorBrewer, # color pattlet
CGPfunctions, # slop graph
ggrepel, # avoid overlap points
ggthemes,
viridis
)
In this project, I am going to use the open data named Premium unleaded petrol prices excluding tax and duty compared with EU countries which is published by GOV.UK
mydata <- import(here("data", "table_511_and_521.xlsx"), which = "5.1.1 (excl tax)", skip = 7)
# Price excl tax
data <- mydata %>% janitor::clean_names() # clean cols name
colnames(data)[18] <- 'UK' # rename
# remove NA value
data <- data[complete.cases(data), ]
# remove unwanted cols
data$month <- NULL
data$day_in_month_of_price_snapshot <- NULL
data$uk_rank_in_eu14_plus_uk <- NULL
data$uk_rank_in_eu27_plus_uk <- NULL
# change variable class
data$year <- as.factor(data$year)
# calculate annual avg price (not weekly price)
data <- data %>%
group_by(year) %>%
summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# wide to long format
dtslop <- data %>%
pivot_longer(
cols = belgium:slovenia,
names_to = "country",
values_to = "price_excl_tax"
)
# rounding value
dtslop$price_excl_tax <- round(dtslop$price_excl_tax, 1)
# class check
str(dtslop)
## tibble [297 × 3] (S3: tbl_df/tbl/data.frame)
## $ year : Factor w/ 11 levels "2013","2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ country : chr [1:297] "belgium" "denmark" "finland" "france" ...
## $ price_excl_tax: num [1:297] 58.2 63.4 58.7 55.2 58 57.8 57.2 59.9 57.9 56.8 ...
# Price incl tax
mydata2 <- import(here("data", "table_511_and_521.xlsx"), which = "5.1.1 (incl tax)", skip = 7)
data2 <- mydata2 %>% janitor::clean_names() # clean cols name
colnames(data2)[18] <- 'UK' # rename
data2 <- data2[complete.cases(data2), ]
data2$month <- NULL
data2$day_in_month_of_price_snapshot <- NULL
data2$uk_rank_in_eu14_plus_uk <- NULL
data2$uk_rank_in_eu27_plus_uk <- NULL
data2$year <- as.factor(data2$year)
# calculate annual avg price (not weekly price)
data2 <- data2 %>%
group_by(year) %>%
summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# wide to long format
dtslop2 <- data2 %>%
pivot_longer(
cols = belgium:slovenia,
names_to = "country",
values_to = "price_incl_tax"
)
# rounding value
dtslop2$price_incl_tax <- round(dtslop2$price_incl_tax, 0)
# data cleaning
dtam <- cbind(dtslop, dtslop2$price_incl_tax)
colnames(dtam)[4] <- 'price_incl_tax'
dtam$price_incl_tax <- round(dtam$price_incl_tax, 1)
world_map <- map_data("world") # get region and long-lat
colnames(dtam)[2] <- 'region' # rename
dtam$region<- R.utils::capitalize(dtam$region)
dt.map <- left_join(dtam, world_map, by = "region", relationship = "many-to-many") # to avoid Warning: Detected an unexpected many-to-many relationship between `x` and `y`
# selection 1 year
dtest <- dt.map %>% filter(year %in% c("2020"))
excl <- ggplot(dtest, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
labs(title = " ",
subtitle = "Price excl tax - Pence (p/litre)",
x = "", y = "") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(
legend.justification = c(0, 1),
legend.position = c(1, .50))
incl <- ggplot(dtest, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_incl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
labs(title = " ",
subtitle = "Price incl tax - Pence (p/litre)",
x = "", y = "") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(
legend.justification = c(0, 1),
legend.position = c(1, .50))
incl <- ggplot(dtest, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_incl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
labs(title = " ",
subtitle = "Price incl tax - Pence (p/litre)",
x = "", y = "") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(
legend.justification = c(0, 1),
legend.position = c(1, .50))
figure <- ggarrange(excl, incl, ncol=2, nrow=1, common.legend = TRUE, legend="bottom")
annotate_figure(figure,
top = text_grob("2020 EU Nation Fuel Price", color = "black", face = "bold", size = 14),
bottom = text_grob("Data source: GOV.UK 2023", color = "black",
hjust = 1, x = 1, face = "italic", size = 10),
left = text_grob("Figure arranged using ggpubr", color = "grey", rot = 90, size = 8),
right = " ",
fig.lab = "Figure 1", fig.lab.face = "bold"
)
dt2022 <- dt.map %>% filter(year %in% c("2022"))
dt2021 <- dt.map %>% filter(year %in% c("2021"))
dt2020 <- dt.map %>% filter(year %in% c("2020"))
dt2019 <- dt.map %>% filter(year %in% c("2019"))
dt2018 <- dt.map %>% filter(year %in% c("2018"))
dt2017 <- dt.map %>% filter(year %in% c("2017"))
g17 <- ggplot(dt2017, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "C") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(legend.position = "none") +
labs(subtitle = "2017",
x = "", y = "")
g18 <- ggplot(dt2018, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "C") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(legend.position = "none") +
labs(subtitle = "2018",
x = "", y = "")
g19 <- ggplot(dt2019, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(legend.position = "none") +
labs(subtitle = "2019",
x = "", y = "")
g20 <- ggplot(dt2020, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(legend.position = "none") +
labs(subtitle = "2020",
x = "", y = "")
g21 <- ggplot(dt2021, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(legend.position = "none") +
labs(subtitle = "2021",
x = "", y = "")
g22 <- ggplot(dt2022, aes(long, lat, group = group))+
geom_polygon(aes(fill = price_excl_tax ), color = "white")+
scale_fill_viridis_c(option = "D") +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
coord_fixed() +
theme_void() +
theme(legend.position = "none") +
labs(subtitle = "2022",
x = "", y = "")
ggarrange(g19, g20, g21, g22, ncol=2, nrow=2, common.legend = TRUE, legend="right")
figure.2 <- ggarrange(g19, g20, g21, g22, ncol=2, nrow=2, common.legend = TRUE, legend="right")
annotate_figure(figure.2,
top = text_grob("2020 EU Nation Fuel Price", color = "black", face = "bold", size = 14),
bottom = text_grob("Data source: GOV.UK 2023", color = "black",
hjust = 1, x = 1, face = "italic", size = 10),
left = text_grob("Figure for (mean) price excluding tax & duty", color = "purple", rot = 90, size = 8),
right = " "
)