Load packages

pacman::p_load(
  rio,          
  here,         
  tidyverse,
  lubridate,
  plotly,
  gganimate,
  ggExtra,      
  ggalt,      
  ggcorrplot,
  ggpubr,
  ggplot2, # ggarrange()
  zoo,
  data.table, 
  formattable,
  tidyr
  )    

Import data

mydata <- import(here("data", "Weekly_Fuel_Prices_240423.xlsx"), which = "All years", skip = 7)

Clean data

data <- mydata %>% janitor::clean_names()
data <- data %>% filter(date > "2004-06-1") # omit NA value 
str(data) # please note: date has POSIXct, format, not as.Date format 
## 'data.frame':    986 obs. of  11 variables:
##  $ date                              : POSIXct, format: "2004-06-07" "2004-06-14" ...
##  $ ulsp_pump_price_p_litre           : num  82.4 81.5 81 80.4 79.9 ...
##  $ ulsp_diff_on_previous_week_p_litre: num  0.0864 -0.8989 -0.4996 -0.634 -0.5348 ...
##  $ ulsp_diff_on_previous_year_p_litre: num  7.85 7.07 6.62 6.06 5.59 ...
##  $ duty_rate_ulsp_p_litre            : num  47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
##  $ vat_percent_rate_ulsp             : num  17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
##  $ ulsd_pump_price_p_litre           : num  83.7 82.6 82.1 81 80.5 ...
##  $ ulsd_diff_on_previous_week_p_litre: num  0 -1.047 -0.511 -1.049 -0.533 ...
##  $ ulsd_diff_on_previous_year_p_litre: num  6.88 5.92 5.47 4.54 4.05 ...
##  $ duty_rate_ulsd_p_litre            : num  47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
##  $ vat_percent_rate_ulsd             : num  17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...

ULSP: Pump price (p/litre)

ULSP Pump price time series data

g1 <- ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
  geom_line() +
  labs(title = "ULSP:  Pump price (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = "Year",
       y = "p/litre") +
  theme_classic() +
  # annotations
  annotate(geom = "text",
           x = as.POSIXct("2008-08-25", origin="1970-01-01"), y = 70,
           label = "2008\nEconomic Crisis?",
           family = "Arial Narrow",
           colour = "red",
           size = 3, fontface = "bold") +
  annotate(geom = "rect",
           xmin = as.POSIXct("2008-08-25", origin="1970-01-01"), xmax = as.POSIXct("2009-08-25", origin="1970-01-01"),
           ymin = -Inf, ymax = Inf,
           fill = "grey80", alpha = .4)
g1

ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
  geom_line(color="#69b3a2") +
  labs(title = "ULSP:  Pump price (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = "Year",
       y = "p/litre") +
  theme_classic() +
  annotate(geom = "text",
           x = as.POSIXct("2008-08-25", origin="1970-01-01"), y = 70,
           label = "2008\nEconomic Crisis?",
           family = "Arial Narrow",
           colour = "red",
           size = 3, fontface = "bold") +
  annotate(geom = "rect",
           xmin = as.POSIXct("2008-08-25", origin="1970-01-01"), xmax = as.POSIXct("2009-08-25", origin="1970-01-01"),
           ymin = -Inf, ymax = Inf,
           fill = "grey80", alpha = .4) +
  annotate(geom="text", x=as.POSIXct("2022-07-04"), y=191.5466, label="Price reached 191$ at mid of 2022", color = "blue", angle = 0,
             hjust = 1.0, vjust = 0.1) +
  annotate(geom="point", x=as.POSIXct("2022-07-04"), y=191.5466, size=6, shape=21, fill="transparent") 

  # geom_hline(yintercept=100, color="orange", size=.5)

Split the time series in several plots by day, week, month, year

data$year <- year(data$date)

# data$date has POSIXct format, so we need to change it into date format for scale_x_date()
data$date <- as.Date(data$date, format = "%Y-%m-%d") 
ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
  geom_line(color="#69b3a2") +
  facet_wrap(~year, scales = "free") +
  scale_x_date(date_labels = "%b")

# Select recent years
x <- data

x <- x[x$date >= as.Date("2013-01-01"), ]

x$date <- as.Date(x$date, format = "%Y-%m-%d")

ggplot(x, aes(x = date, y = ulsp_pump_price_p_litre)) +
  geom_line(color="#69b3a2") +
  facet_wrap(~year, scales = "free") +
  scale_x_date(date_labels = "%b")

# Adding color to line 
g2 <- ggplot(data, aes(date, ulsp_pump_price_p_litre, color = ulsp_pump_price_p_litre)) +  
  geom_line(show.legend = T) +
  scale_colour_gradient(low = "blue", high = "orange") +
  labs(title = "ULSP:  Pump price (p/litre)",
      subtitle = "Weekly Prices time series (from 2003)",
       caption = "Source: GOV.UK 2023",
       x = "Year",
       y = "p/litre")+
  theme_classic()
g2

ulsp_diff_on_previous_week_p_litre

g2 <- ggplot(data, aes(x = date, y = ulsp_diff_on_previous_week_p_litre)) +
  geom_line() +
  labs(title = "ULSP:  Pump price (p/litre)",
       subtitle="Diff on previous WEEK (p/litre)",
       caption = "Source: GOV.UK",
       x = "Year",
       y = "p/litre") +
  theme_classic()

g2

ulsp_diff_on_previous_year_p_litre

g3 <- ggplot(data, aes(date, ulsp_diff_on_previous_year_p_litre)) +
  geom_line() +
  labs(title = "ULSP:  Pump price (p/litre)",
       subtitle = "Diff on previous  YEAR (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = "Year",
       y = "p/litre") +
  theme_classic()

g3

Plot combination: time series data & diff

g1 <- ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
  geom_line() +
  labs(title    = "ULSP:  Pump price (p/litre)",
       subtitle = "Weekly Prices time series (from 2003)",
       y        = "p/litre") + 
  theme_classic() + 
  theme_update(axis.title.x = element_blank()) 

# theme_update(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank())

g2 <- ggplot(data, aes(x = date, y = ulsp_diff_on_previous_week_p_litre)) +
  geom_line() +
  labs(subtitle  ="Diff on previous WEEK (p/litre)",
       y         = "p/litre") + 
  theme_classic() + 
  theme_update(axis.title.x = element_blank()) 


g3 <- ggplot(data, aes(date, ulsp_diff_on_previous_year_p_litre)) +
  geom_line() +
  labs(subtitle  = "Diff on previous  YEAR (p/litre)",
       y         = "p/litre") + 
  theme_classic() + 
  theme_update(axis.title.x = element_blank()) 
ggpubr::ggarrange(g1,  # First row with line plot
          ggarrange(g2, g3, ncol = 2, labels = c("B", "C")), # Second row with box and dot plots
          nrow   = 2,
          labels = "A"
  ) 

ULSD: Pump price (p/litre)

ULSD Pump price time series data

g1 <- ggplot(data, aes(x = date, y = ulsd_pump_price_p_litre)) +
  geom_line() +
  labs(title = "ULSD: Pump price (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = "Year",
       y = "p/litre") +
  theme_classic() +
  annotate(geom = "text",
           x = as.Date("2008-08-25", format = "%Y-%m-%d"), y = 70, 
           label = "2008\nEconomic Crisis?",
           family = "Arial Narrow",
           colour = "red",
           size = 3, fontface = "bold") +
  annotate(geom = "rect",
           xmin = as.Date("2008-08-25", format = "%Y-%m-%d"), xmax = as.Date("2009-08-25", format = "%Y-%m-%d"),
           ymin = -Inf, ymax = Inf,
           fill = "grey80", alpha = .4)

g1
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

ulsd_diff_on_previous_week_p_litre

g2 <- ggplot(data, aes(x = date, y = ulsd_diff_on_previous_week_p_litre)) +
  geom_line() +
  labs(title = "ULSD:  Pump price (p/litre)",
       subtitle="Diff on previous WEEK (p/litre)",
       caption = "Source: GOV.UK",
       x = "Year",
       y = "p/litre") +
  theme_classic()

g2

ulsd_diff_on_previous_year_p_litre

g3 <- ggplot(data, aes(date, ulsd_diff_on_previous_year_p_litre)) +
  geom_line() +
  labs(title = "ULSD:  Pump price (p/litre)",
       subtitle = "Diff on previous  YEAR (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = "Year",
       y = "p/litre") +
  theme_classic()

g3

Plot combination: time series data & diff

g1 <- ggplot(data, aes(x = date, y = ulsd_pump_price_p_litre)) +
  geom_line() +
  labs(title    = "ULSD:  Pump price (p/litre)",
       subtitle = "Weekly Prices time series (from 2003)",
       y        = "p/litre") + 
  theme_classic() + 
  theme_update(axis.title.x = element_blank()) 


g2 <- ggplot(data, aes(x = date, y = ulsd_diff_on_previous_week_p_litre)) +
  geom_line() +
  labs(subtitle  ="Diff on previous WEEK (p/litre)",
       y         = "p/litre") + 
  theme_classic() + 
  theme_update(axis.title.x = element_blank()) 


g3 <- ggplot(data, aes(date, ulsd_diff_on_previous_year_p_litre)) +
  geom_line() +
  labs(subtitle  = "Diff on previous  YEAR (p/litre)",
       y         = "p/litre") + 
  theme_classic() + 
  theme_update(axis.title.x = element_blank()) 
ggpubr::ggarrange(g1,  # First row with line plot
          ggarrange(g2, g3, ncol = 2, labels = c("B", "C")), # Second row with box and dot plots
          nrow   = 2,
          labels = "A"
  ) 

Comparision of ULSP & ULSD

dt <- data

dt1 <- dt %>% select(c(1:6))
str(dt1)
## 'data.frame':    986 obs. of  6 variables:
##  $ date                              : Date, format: "2004-06-07" "2004-06-14" ...
##  $ ulsp_pump_price_p_litre           : num  82.4 81.5 81 80.4 79.9 ...
##  $ ulsp_diff_on_previous_week_p_litre: num  0.0864 -0.8989 -0.4996 -0.634 -0.5348 ...
##  $ ulsp_diff_on_previous_year_p_litre: num  7.85 7.07 6.62 6.06 5.59 ...
##  $ duty_rate_ulsp_p_litre            : num  47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
##  $ vat_percent_rate_ulsp             : num  17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
dt1 <- dt1 %>%
  add_column(fuel_type = "ULSP") %>% rename("pump_price"   = "ulsp_pump_price_p_litre",   # new = old 
                                             "diff_week"    = "ulsp_diff_on_previous_week_p_litre",
                                             "diff_year"    = "ulsp_diff_on_previous_year_p_litre",
                                             "duty_rate"    = "duty_rate_ulsp_p_litre",
                                             "vat_per_rate" = "vat_percent_rate_ulsp")

dt2 <- dt %>% select(1, 7:11)
dt2 <- dt2 %>%
  add_column(fuel_type = "ULSD") %>% rename("pump_price"   = "ulsd_pump_price_p_litre",
                                             "diff_week"    = "ulsd_diff_on_previous_week_p_litre",
                                             "diff_year"    = "ulsd_diff_on_previous_year_p_litre",
                                             "duty_rate"    = "duty_rate_ulsd_p_litre",
                                             "vat_per_rate" = "vat_percent_rate_ulsd")


dt <- bind_rows(dt1, dt2)
# Prepare data for visualisation 
dt$fuel_type <- as.factor(dt$fuel_type)
str(dt)
## 'data.frame':    1972 obs. of  7 variables:
##  $ date        : Date, format: "2004-06-07" "2004-06-14" ...
##  $ pump_price  : num  82.4 81.5 81 80.4 79.9 ...
##  $ diff_week   : num  0.0864 -0.8989 -0.4996 -0.634 -0.5348 ...
##  $ diff_year   : num  7.85 7.07 6.62 6.06 5.59 ...
##  $ duty_rate   : num  47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 47.1 ...
##  $ vat_per_rate: num  17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 17.5 ...
##  $ fuel_type   : Factor w/ 2 levels "ULSD","ULSP": 2 2 2 2 2 2 2 2 2 2 ...
# Compare ULSP vs ULSD 
g1 <- ggplot(dt, aes(date, pump_price, color = fuel_type)) +  # using linetype = 
  geom_line(show.legend = FALSE) +
  labs(title    = "Pump price",
       subtitle = "Weekly Prices time series (from 2023): ULSP vs ULSD",
       y        = "p/litre",
       x        = "date") + 
  theme_classic() +
  theme_update(axis.title.x = element_blank())

# ULSP 
g2 <- ggplot(data, aes(x = date, y = ulsp_pump_price_p_litre)) +
  geom_line(color = "lightblue") +
  labs(title    = "ULSP:  Pump price",
       y        = "p/litre",
       x        = "date") + 
  theme_classic() +
  theme_update(axis.title.x = element_blank())

# ULSD
g3 <- ggplot(data, aes(x = date, y = ulsd_pump_price_p_litre)) +
  geom_line(color = "red") +
  labs(title = "ULSD: Pump price",
       y        = "p/litre",
       x        = "date",
       caption = "https://www.gov.uk/government/statistics/weekly-road-fuel-prices") + 
  theme_classic() +
  theme_update(axis.title.x = element_blank())


# Arrange on the same page 
gridExtra::grid.arrange(g1, g2, g3, ncol = 1, nrow = 3) # arrange plot in 1 same page

# Trend by month per year from 2003 - 2023 

# Clean cols name 
data <- mydata %>% janitor::clean_names()
df <- data %>% filter(date >= "2004-01-05" & date <= "2023-01-05")

df <- df %>% select(c(1:2))

# New cols of year, month, date  
df <- df %>% dplyr::mutate(year = lubridate::year(date), 
                           month = lubridate::month(date), 
                           day = lubridate::day(date)
                           )

df$date <- as.Date(df$date, format = "%Y-%m-%d") 

# Calculate number of weeks 
df$week <- isoweek(ymd(df$date))

# Week of month 
df <- df %>% 
    group_by(week) %>% 
    mutate(year_week = first(date)) 
# Visualation 
df %>% 
    ggplot(aes(year_week, ulsp_pump_price_p_litre, color = factor(year))) +
    geom_line() +
    scale_x_date(date_breaks="1 month", date_labels="%b") + 
    geom_label(aes(label = factor(year)),
               data = df %>% group_by(year) %>% filter(date == max(date)),
               nudge_x = 0.35,
               size = 4)+
    labs(title = "ULSP:  Pump price (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = " ",
       y = "p/litre") +
  theme_classic() +
  theme(legend.position="none")

# 5 recent years
df1 <- df %>% filter(year >= "2018")

df1 %>% ggplot(aes(year_week, ulsp_pump_price_p_litre, color = factor(year))) +
    geom_line() +
    scale_x_date(date_breaks="1 month", date_labels="%b") + 
    geom_label(aes(label = factor(year)),
               data = df1 %>% group_by(year) %>% filter(date == max(date)),
               nudge_x = 0.1,
               size = 2)+
    labs(title = "ULSP:  Pump price (p/litre)",
       caption = "Source: GOV.UK 2023",
       x = " ",
       y = "p/litre") +
  theme_classic() +
  theme(legend.position="none") 

Data table

# df_q <- data %>% filter(date >= "2004-01-05" & date <= "2023-01-05")
df_q <- data %>% select(c(1:2))

# transform data from date to quarter
df_q <- df_q %>%
      group_by(quarter = zoo::as.yearqtr(date)) %>%
      summarise(mp = median(ulsp_pump_price_p_litre), .groups = 'drop') %>%   
      mutate(diff= lead(mp) - mp)

df_q$quarter <- as.character(df_q$quarter) 

a <- stringr::str_split_fixed(df_q$quarter, " ", 2) 

df_q <- cbind(a, df_q)

names(df_q)[1] <- "year"
names(df_q)[2] <- "quater"
df_q$quarter <- NULL

# long format to wide format
df_w <- df_q %>% 
  pivot_wider(names_from = quater, 
              values_from = c(mp, diff),
              values_fill = 0)

# round value 
df_w <- df_w %>% mutate(across(where(is.numeric), ~round(., 1)))

head(df_w, 10)
## # A tibble: 10 × 9
##    year  mp_Q2 mp_Q3 mp_Q4 mp_Q1 diff_Q2 diff_Q3 diff_Q4 diff_Q1
##    <chr> <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 2003   74.4  75.8  76     0       1.4     0.2     0.5     0  
##  2 2004   80.4  81.3  83.3  76.4     0.8     2.1    -3.2     4  
##  3 2005   85.4  90.6  90.5  80.2     5.2     0      -1       5.2
##  4 2006   95.4  95.7  85.7  89.6     0.3   -10       1.4     5.8
##  5 2007   95.1  96   101.   87.1     0.9     5.1     3.1     8  
##  6 2008  113   112.   94.8 104.     -0.4   -17.7    -4.7     8.7
##  7 2009   97.5 104   108.   90.1     6.4     3.5     4.4     7.4
##  8 2010  120.  116.  119.  112.     -3.6     2.5     9.9     8  
##  9 2011  136.  135   134.  129.     -0.6    -1.3     1.2     6.9
## 10 2012  138.  134.  135.  135.     -4       0.6     1.4     3.4