pacman::p_load(
rio,
here,
tidyverse,
lubridate,
plotly,
gganimate,
ggExtra,
ggalt,
ggcorrplot,
ggpubr,
ggplot2, # ggarrange()
zoo,
data.table,
formattable,
tidyr
)
mydata <- import(here("data", "Weekly_Fuel_Prices_240423.xlsx"), which = "All years", skip = 7)
data <- mydata %>% janitor::clean_names()
data <- data %>% filter(date > "2004-06-1") # omit NA value
str(data) # please note: date has POSIXct, format, not as.Date format
## 'data.frame': 986 obs. of 11 variables:
## $ date : POSIXct, format: "2004-06-07" "2004-06-14" ...
## $ ulsp_pump_price_p_litre : num 82.4 81.5 81 80.4 79.9 ...
## $ ulsp_diff_on_previous_week_p_litre: num 0.0864 -0.8989 -0.4996 -0.634 -0.5348 ...
## $ ulsp_diff_on_previous_year_p_litre: num 7.85 7.07 6.62 6.06 5.59 ...
## $ duty_rate_ulsp_p_litre : num 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
## $ vat_percent_rate_ulsp : num 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
## $ ulsd_pump_price_p_litre : num 83.7 82.6 82.1 81 80.5 ...
## $ ulsd_diff_on_previous_week_p_litre: num 0 -1.047 -0.511 -1.049 -0.533 ...
## $ ulsd_diff_on_previous_year_p_litre: num 6.88 5.92 5.47 4.54 4.05 ...
## $ duty_rate_ulsd_p_litre : num 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
## $ vat_percent_rate_ulsd : num 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
g1 <- ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
geom_line() +
labs(title = "ULSP: Pump price (p/litre)",
caption = "Source: GOV.UK 2023",
x = "Year",
y = "p/litre") +
theme_classic() +
# annotations
annotate(geom = "text",
x = as.POSIXct("2008-08-25", origin="1970-01-01"), y = 70,
label = "2008\nEconomic Crisis?",
family = "Arial Narrow",
colour = "red",
size = 3, fontface = "bold") +
annotate(geom = "rect",
xmin = as.POSIXct("2008-08-25", origin="1970-01-01"), xmax = as.POSIXct("2009-08-25", origin="1970-01-01"),
ymin = -Inf, ymax = Inf,
fill = "grey80", alpha = .4)
g1
ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
geom_line(color="#69b3a2") +
labs(title = "ULSP: Pump price (p/litre)",
caption = "Source: GOV.UK 2023",
x = "Year",
y = "p/litre") +
theme_classic() +
annotate(geom = "text",
x = as.POSIXct("2008-08-25", origin="1970-01-01"), y = 70,
label = "2008\nEconomic Crisis?",
family = "Arial Narrow",
colour = "red",
size = 3, fontface = "bold") +
annotate(geom = "rect",
xmin = as.POSIXct("2008-08-25", origin="1970-01-01"), xmax = as.POSIXct("2009-08-25", origin="1970-01-01"),
ymin = -Inf, ymax = Inf,
fill = "grey80", alpha = .4) +
annotate(geom="text", x=as.POSIXct("2022-07-04"), y=191.5466, label="Price reached 191$ at mid of 2022", color = "blue", angle = 0,
hjust = 1.0, vjust = 0.1) +
annotate(geom="point", x=as.POSIXct("2022-07-04"), y=191.5466, size=6, shape=21, fill="transparent")
# geom_hline(yintercept=100, color="orange", size=.5)
Split the time series in several plots by day, week, month, year
data$year <- year(data$date)
# data$date has POSIXct format, so we need to change it into date format for scale_x_date()
data$date <- as.Date(data$date, format = "%Y-%m-%d")
ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
geom_line(color="#69b3a2") +
facet_wrap(~year, scales = "free") +
scale_x_date(date_labels = "%b")
# Select recent years
x <- data
x <- x[x$date >= as.Date("2013-01-01"), ]
x$date <- as.Date(x$date, format = "%Y-%m-%d")
ggplot(x, aes(x = date, y = ulsp_pump_price_p_litre)) +
geom_line(color="#69b3a2") +
facet_wrap(~year, scales = "free") +
scale_x_date(date_labels = "%b")
# Adding color to line
g2 <- ggplot(data, aes(date, ulsp_pump_price_p_litre, color = ulsp_pump_price_p_litre)) +
geom_line(show.legend = T) +
scale_colour_gradient(low = "blue", high = "orange") +
labs(title = "ULSP: Pump price (p/litre)",
subtitle = "Weekly Prices time series (from 2003)",
caption = "Source: GOV.UK 2023",
x = "Year",
y = "p/litre")+
theme_classic()
g2
g2 <- ggplot(data, aes(x = date, y = ulsp_diff_on_previous_week_p_litre)) +
geom_line() +
labs(title = "ULSP: Pump price (p/litre)",
subtitle="Diff on previous WEEK (p/litre)",
caption = "Source: GOV.UK",
x = "Year",
y = "p/litre") +
theme_classic()
g2
g3 <- ggplot(data, aes(date, ulsp_diff_on_previous_year_p_litre)) +
geom_line() +
labs(title = "ULSP: Pump price (p/litre)",
subtitle = "Diff on previous YEAR (p/litre)",
caption = "Source: GOV.UK 2023",
x = "Year",
y = "p/litre") +
theme_classic()
g3
g1 <- ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
geom_line() +
labs(title = "ULSP: Pump price (p/litre)",
subtitle = "Weekly Prices time series (from 2003)",
y = "p/litre") +
theme_classic() +
theme_update(axis.title.x = element_blank())
# theme_update(axis.title.x=element_blank(),
# axis.text.x=element_blank(),
# axis.ticks.x=element_blank())
g2 <- ggplot(data, aes(x = date, y = ulsp_diff_on_previous_week_p_litre)) +
geom_line() +
labs(subtitle ="Diff on previous WEEK (p/litre)",
y = "p/litre") +
theme_classic() +
theme_update(axis.title.x = element_blank())
g3 <- ggplot(data, aes(date, ulsp_diff_on_previous_year_p_litre)) +
geom_line() +
labs(subtitle = "Diff on previous YEAR (p/litre)",
y = "p/litre") +
theme_classic() +
theme_update(axis.title.x = element_blank())
ggpubr::ggarrange(g1, # First row with line plot
ggarrange(g2, g3, ncol = 2, labels = c("B", "C")), # Second row with box and dot plots
nrow = 2,
labels = "A"
)
g1 <- ggplot(data, aes(x = date, y = ulsd_pump_price_p_litre)) +
geom_line() +
labs(title = "ULSD: Pump price (p/litre)",
caption = "Source: GOV.UK 2023",
x = "Year",
y = "p/litre") +
theme_classic() +
annotate(geom = "text",
x = as.Date("2008-08-25", format = "%Y-%m-%d"), y = 70,
label = "2008\nEconomic Crisis?",
family = "Arial Narrow",
colour = "red",
size = 3, fontface = "bold") +
annotate(geom = "rect",
xmin = as.Date("2008-08-25", format = "%Y-%m-%d"), xmax = as.Date("2009-08-25", format = "%Y-%m-%d"),
ymin = -Inf, ymax = Inf,
fill = "grey80", alpha = .4)
g1
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
g2 <- ggplot(data, aes(x = date, y = ulsd_diff_on_previous_week_p_litre)) +
geom_line() +
labs(title = "ULSD: Pump price (p/litre)",
subtitle="Diff on previous WEEK (p/litre)",
caption = "Source: GOV.UK",
x = "Year",
y = "p/litre") +
theme_classic()
g2
g3 <- ggplot(data, aes(date, ulsd_diff_on_previous_year_p_litre)) +
geom_line() +
labs(title = "ULSD: Pump price (p/litre)",
subtitle = "Diff on previous YEAR (p/litre)",
caption = "Source: GOV.UK 2023",
x = "Year",
y = "p/litre") +
theme_classic()
g3
g1 <- ggplot(data, aes(x = date, y = ulsd_pump_price_p_litre)) +
geom_line() +
labs(title = "ULSD: Pump price (p/litre)",
subtitle = "Weekly Prices time series (from 2003)",
y = "p/litre") +
theme_classic() +
theme_update(axis.title.x = element_blank())
g2 <- ggplot(data, aes(x = date, y = ulsd_diff_on_previous_week_p_litre)) +
geom_line() +
labs(subtitle ="Diff on previous WEEK (p/litre)",
y = "p/litre") +
theme_classic() +
theme_update(axis.title.x = element_blank())
g3 <- ggplot(data, aes(date, ulsd_diff_on_previous_year_p_litre)) +
geom_line() +
labs(subtitle = "Diff on previous YEAR (p/litre)",
y = "p/litre") +
theme_classic() +
theme_update(axis.title.x = element_blank())
ggpubr::ggarrange(g1, # First row with line plot
ggarrange(g2, g3, ncol = 2, labels = c("B", "C")), # Second row with box and dot plots
nrow = 2,
labels = "A"
)
dt <- data
dt1 <- dt %>% select(c(1:6))
str(dt1)
## 'data.frame': 986 obs. of 6 variables:
## $ date : Date, format: "2004-06-07" "2004-06-14" ...
## $ ulsp_pump_price_p_litre : num 82.4 81.5 81 80.4 79.9 ...
## $ ulsp_diff_on_previous_week_p_litre: num 0.0864 -0.8989 -0.4996 -0.634 -0.5348 ...
## $ ulsp_diff_on_previous_year_p_litre: num 7.85 7.07 6.62 6.06 5.59 ...
## $ duty_rate_ulsp_p_litre : num 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
## $ vat_percent_rate_ulsp : num 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
dt1 <- dt1 %>%
add_column(fuel_type = "ULSP") %>% rename("pump_price" = "ulsp_pump_price_p_litre", # new = old
"diff_week" = "ulsp_diff_on_previous_week_p_litre",
"diff_year" = "ulsp_diff_on_previous_year_p_litre",
"duty_rate" = "duty_rate_ulsp_p_litre",
"vat_per_rate" = "vat_percent_rate_ulsp")
dt2 <- dt %>% select(1, 7:11)
dt2 <- dt2 %>%
add_column(fuel_type = "ULSD") %>% rename("pump_price" = "ulsd_pump_price_p_litre",
"diff_week" = "ulsd_diff_on_previous_week_p_litre",
"diff_year" = "ulsd_diff_on_previous_year_p_litre",
"duty_rate" = "duty_rate_ulsd_p_litre",
"vat_per_rate" = "vat_percent_rate_ulsd")
dt <- bind_rows(dt1, dt2)
# Prepare data for visualisation
dt$fuel_type <- as.factor(dt$fuel_type)
str(dt)
## 'data.frame': 1972 obs. of 7 variables:
## $ date : Date, format: "2004-06-07" "2004-06-14" ...
## $ pump_price : num 82.4 81.5 81 80.4 79.9 ...
## $ diff_week : num 0.0864 -0.8989 -0.4996 -0.634 -0.5348 ...
## $ diff_year : num 7.85 7.07 6.62 6.06 5.59 ...
## $ duty_rate : num 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
## $ vat_per_rate: num 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
## $ fuel_type : Factor w/ 2 levels "ULSD","ULSP": 2 2 2 2 2 2 2 2 2 2 ...
# Compare ULSP vs ULSD
g1 <- ggplot(dt, aes(date, pump_price, color = fuel_type)) + # using linetype =
geom_line(show.legend = FALSE) +
labs(title = "Pump price",
subtitle = "Weekly Prices time series (from 2023): ULSP vs ULSD",
y = "p/litre",
x = "date") +
theme_classic() +
theme_update(axis.title.x = element_blank())
# ULSP
g2 <- ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
geom_line(color = "lightblue") +
labs(title = "ULSP: Pump price",
y = "p/litre",
x = "date") +
theme_classic() +
theme_update(axis.title.x = element_blank())
# ULSD
g3 <- ggplot(data, aes(x = date, y = ulsd_pump_price_p_litre)) +
geom_line(color = "red") +
labs(title = "ULSD: Pump price",
y = "p/litre",
x = "date",
caption = "https://www.gov.uk/government/statistics/weekly-road-fuel-prices") +
theme_classic() +
theme_update(axis.title.x = element_blank())
# Arrange on the same page
gridExtra::grid.arrange(g1, g2, g3, ncol = 1, nrow = 3) # arrange plot in 1 same page
# Trend by month per year from 2003 - 2023
# Clean cols name
data <- mydata %>% janitor::clean_names()
df <- data %>% filter(date >= "2004-01-05" & date <= "2023-01-05")
df <- df %>% select(c(1:2))
# New cols of year, month, date
df <- df %>% dplyr::mutate(year = lubridate::year(date),
month = lubridate::month(date),
day = lubridate::day(date)
)
df$date <- as.Date(df$date, format = "%Y-%m-%d")
# Calculate number of weeks
df$week <- isoweek(ymd(df$date))
# Week of month
df <- df %>%
group_by(week) %>%
mutate(year_week = first(date))
# Visualation
df %>%
ggplot(aes(year_week, ulsp_pump_price_p_litre, color = factor(year))) +
geom_line() +
scale_x_date(date_breaks="1 month", date_labels="%b") +
geom_label(aes(label = factor(year)),
data = df %>% group_by(year) %>% filter(date == max(date)),
nudge_x = 0.35,
size = 4)+
labs(title = "ULSP: Pump price (p/litre)",
caption = "Source: GOV.UK 2023",
x = " ",
y = "p/litre") +
theme_classic() +
theme(legend.position="none")
# 5 recent years
df1 <- df %>% filter(year >= "2018")
df1 %>% ggplot(aes(year_week, ulsp_pump_price_p_litre, color = factor(year))) +
geom_line() +
scale_x_date(date_breaks="1 month", date_labels="%b") +
geom_label(aes(label = factor(year)),
data = df1 %>% group_by(year) %>% filter(date == max(date)),
nudge_x = 0.1,
size = 2)+
labs(title = "ULSP: Pump price (p/litre)",
caption = "Source: GOV.UK 2023",
x = " ",
y = "p/litre") +
theme_classic() +
theme(legend.position="none")
# df_q <- data %>% filter(date >= "2004-01-05" & date <= "2023-01-05")
df_q <- data %>% select(c(1:2))
# transform data from date to quarter
df_q <- df_q %>%
group_by(quarter = zoo::as.yearqtr(date)) %>%
summarise(mp = median(ulsp_pump_price_p_litre), .groups = 'drop') %>%
mutate(diff= lead(mp) - mp)
df_q$quarter <- as.character(df_q$quarter)
a <- stringr::str_split_fixed(df_q$quarter, " ", 2)
df_q <- cbind(a, df_q)
names(df_q)[1] <- "year"
names(df_q)[2] <- "quater"
df_q$quarter <- NULL
# long format to wide format
df_w <- df_q %>%
pivot_wider(names_from = quater,
values_from = c(mp, diff),
values_fill = 0)
# round value
df_w <- df_w %>% mutate(across(where(is.numeric), ~round(., 1)))
head(df_w, 10)
## # A tibble: 10 × 9
## year mp_Q2 mp_Q3 mp_Q4 mp_Q1 diff_Q2 diff_Q3 diff_Q4 diff_Q1
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2003 74.4 75.8 76 0 1.4 0.2 0.5 0
## 2 2004 80.4 81.3 83.3 76.4 0.8 2.1 -3.2 4
## 3 2005 85.4 90.6 90.5 80.2 5.2 0 -1 5.2
## 4 2006 95.4 95.7 85.7 89.6 0.3 -10 1.4 5.8
## 5 2007 95.1 96 101. 87.1 0.9 5.1 3.1 8
## 6 2008 113 112. 94.8 104. -0.4 -17.7 -4.7 8.7
## 7 2009 97.5 104 108. 90.1 6.4 3.5 4.4 7.4
## 8 2010 120. 116. 119. 112. -3.6 2.5 9.9 8
## 9 2011 136. 135 134. 129. -0.6 -1.3 1.2 6.9
## 10 2012 138. 134. 135. 135. -4 0.6 1.4 3.4