pacman::p_load(
rio,
here,
tidyverse,
lubridate,
plotly,
gganimate,
ggExtra,
ggalt,
ggcorrplot,
ggpubr,
ggplot2, # ggarrange()
zoo,
data.table,
formattable,
tidyr,
ggpmisc, # stat_valey
ggalluvial, # sankey chart
RColorBrewer, # color pattlet
CGPfunctions, # slop graph
ggrepel, # avoid overlap points
ggthemes,
viridis
)
In this project, I am going to use the open data named Premium unleaded petrol prices excluding tax and duty compared with EU countries which is published by GOV.UK
mydata <- import(here("data", "table_511_and_521.xlsx"), which = "5.1.1 (excl tax)", skip = 7)
# Price excl tax
data <- mydata %>% janitor::clean_names() # clean cols name
colnames(data)[18] <- 'UK' # rename
# remove NA value
data <- data[complete.cases(data), ]
# remove unwanted cols
data$month <- NULL
data$day_in_month_of_price_snapshot <- NULL
data$uk_rank_in_eu14_plus_uk <- NULL
data$uk_rank_in_eu27_plus_uk <- NULL
# change variable class
data$year <- as.factor(data$year)
# calculate annual avg price (not weekly price)
data <- data %>%
group_by(year) %>%
summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# wide to long format
dtslop <- data %>%
pivot_longer(
cols = belgium:slovenia,
names_to = "country",
values_to = "price_excl_tax"
)
# rounding value
dtslop$price_excl_tax <- round(dtslop$price_excl_tax, 1)
# class check
str(dtslop)
## tibble [297 × 3] (S3: tbl_df/tbl/data.frame)
## $ year : Factor w/ 11 levels "2013","2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ country : chr [1:297] "belgium" "denmark" "finland" "france" ...
## $ price_excl_tax: num [1:297] 58.2 63.4 58.7 55.2 58 57.8 57.2 59.9 57.9 56.8 ...
Using newggslopegraph {CGPfunctions}
# “econ” theme
t <- dtslop %>% filter(year %in% c("2019", "2020", "2021", "2022")) # country has chr class so must apply dplyr %in%, not ==
CGPfunctions::newggslopegraph(t, year, price_excl_tax, country,
Title = "Premium unleaded petrol prices excluding tax and duty",
SubTitle = "2019-2023",
Caption = "By R CHARTS",
CaptionJustify = "left",
ThemeChoice = "econ",
DataTextSize = 2.5,
DataLabelPadding = .05)
##
## Converting 'year' to an ordered factor
## Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# “wsj” theme
t <- t %>% filter(country %in% c("UK", "sweden", "france", "netherlands", "belgium", "luxembourg", "germany", "greece", "malta", "italy", "findland"))
CGPfunctions::newggslopegraph(t, year, price_excl_tax, country,
Title = "Premium unleaded petrol prices excluding tax and duty",
SubTitle = "2019-2023",
Caption = "By R CHARTS",
CaptionJustify = "left",
ThemeChoice = "wsj",
DataTextSize = 2.5,
DataLabelPadding = .05)
##
## Converting 'year' to an ordered factor
Here we import the data of Premium unleaded petrol prices excluding tax and duty compared with EU countries
# Price incl tax
mydata2 <- import(here("data", "table_511_and_521.xlsx"), which = "5.1.1 (incl tax)", skip = 7)
data2 <- mydata2 %>% janitor::clean_names() # clean cols name
colnames(data2)[18] <- 'UK' # rename
data2 <- data2[complete.cases(data2), ]
data2$month <- NULL
data2$day_in_month_of_price_snapshot <- NULL
data2$uk_rank_in_eu14_plus_uk <- NULL
data2$uk_rank_in_eu27_plus_uk <- NULL
data2$year <- as.factor(data2$year)
# calculate annual avg price (not weekly price)
data2 <- data2 %>%
group_by(year) %>%
summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# wide to long format
dtslop2 <- data2 %>%
pivot_longer(
cols = belgium:slovenia,
names_to = "country",
values_to = "price_incl_tax"
)
# rounding value
dtslop2$price_incl_tax <- round(dtslop2$price_incl_tax, 0)
# data cleaning
dtam <- cbind(dtslop, dtslop2$price_incl_tax)
colnames(dtam)[4] <- 'price_incl_tax'
dtam$price_incl_tax <- round(dtam$price_incl_tax, 1)
For the purpose of visualtion, we select data in 2022:
# select 1 year only
b <- dtam %>% filter(year %in% c("2022"))
# transform data from wide to long format
c <- b %>%
pivot_longer(
cols = price_excl_tax:price_incl_tax,
names_to = "type",
values_to = "price"
) %>%
select(-c(year))
# data viz
ggplot(c, aes(x = reorder(country,-price), y = price, fill = type)) +
geom_bar(stat = "identity") +
scale_fill_brewer() +
guides(fill = guide_legend(title = "Title")) +
labs(title = "Pump Price of Unleaded Petrol as in 2022",
x = " ",
y = "Pence (p/litre)") +
theme_classic() +
coord_flip()
# manually fill
ggplot(c, aes(x = reorder(country,-price), y = price, fill = type)) +
geom_bar(stat = "identity") +
guides(fill = guide_legend(title = "Title")) +
labs(title = "Pump Price of Unleaded Petrol as in 2022",
x = " ",
y = "Pence (p/litre)") +
scale_fill_manual(values = c(alpha("#33A02C",0.4), alpha("#33A02C", 1))) + # ("#1F78B4", "#1F78B4")
theme_classic() +
coord_flip()
data <- mydata %>% janitor::clean_names() # clean cols name
data <- data[complete.cases(data), ]
data$month <- NULL
data$day_in_month_of_price_snapshot <- NULL
data$uk_rank_in_eu14_plus_uk <- NULL
data$uk_rank_in_eu27_plus_uk <- NULL
data$year <- as.factor(data$year)
data <- data %>%
group_by(year) %>%
summarise(across(2:28, ~ mean(.x, na.rm = TRUE)))
# Ranking the data
# newdata <- data.frame(data, t(apply(-data[-1], 1, rank, ties.method='min', na.last='keep')))
dt_rank <- cbind(data[1], t(apply(-data[-1], 1, rank, ties.method='min', na.last='keep')))
colnames(dt_rank)[15] <- 'UK' # rename
# long to wide
dt <- dt_rank %>%
pivot_longer(
cols = belgium:slovenia,
names_to = "country",
values_to = "rank"
)
# changing variable class (required for sankey chart)
dt$year <- as.character(dt$year)
dt$country <- as.character(dt$country)
dt$rank <- as.factor(dt$rank)
str(dt)
## tibble [297 × 3] (S3: tbl_df/tbl/data.frame)
## $ year : chr [1:297] "2013" "2013" "2013" "2013" ...
## $ country: chr [1:297] "belgium" "denmark" "finland" "france" ...
## $ rank : Factor w/ 27 levels "1","2","3","4",..: 9 1 8 24 11 13 15 6 12 17 ...
# visualisation
ggplot(dt, aes(x = year, stratum = rank, alluvium = country, fill = country, label = country)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft", color = "darkgray") +
geom_stratum() +
theme(legend.position = "bottom") +
ggtitle("Fuel price rank changes") +
scale_fill_viridis_d(direction = -1) +
theme_classic()
For better visualisation, some nations are selected:
datas <- data %>% select(c(1:16)) # Only 15 nations are selected
dt_rank <- cbind(datas[1], t(apply(-datas[-1], 1, rank, ties.method='min', na.last='keep')))
colnames(dt_rank)[15] <- 'UK' # rename
#
dt2 <- dt_rank %>%
pivot_longer(
cols = belgium:bulgaria,
names_to = "country",
values_to = "rank"
)
# changing variable class (required for sankey chart)
dt2$year <- as.character(dt2$year)
dt2$country <- as.character(dt2$country)
dt2$rank <- as.factor(dt2$rank)
str(dt2)
## tibble [165 × 3] (S3: tbl_df/tbl/data.frame)
## $ year : chr [1:165] "2013" "2013" "2013" "2013" ...
## $ country: chr [1:165] "belgium" "denmark" "finland" "france" ...
## $ rank : Factor w/ 15 levels "1","2","3","4",..: 6 1 5 14 8 10 11 4 9 13 ...
ggplot(dt2, aes(x = year, stratum = rank, alluvium = country, fill = country, label = country)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft", color = "darkgray") +
geom_stratum(alpha = .8) + # color gradient
theme(legend.position = "bottom") +
ggtitle("Fuel price rank changes") +
scale_fill_viridis_d(direction = -1) +
theme_classic() +
geom_text(stat = "stratum", aes(label = rank))
data <- mydata %>% janitor::clean_names() # clean cols name
data$month <- as.Date(as.numeric(data$month), origin = "1899-12-30")
data$Month <- format(as.Date(data$month, format="%Y/%m/%d"),"%m")
# convert month in number into month abbreviation
data$Month <- as.numeric(data$Month)
data <- transform(data, MonthAbb = month.abb[Month])