Time Series Visualisations

Jason Ola

2023-01-24

Import libraries

library(tidyverse)
library(lubridate)
library(tsibble)

Import dataset

The data can be found on my github

data_smi <- read_csv("data/data_smi.csv")

We see that we have 2 columns, price and date. We can already see that the date is in character format so we can already fix this to date format.

smi <- data_smi %>% 
  mutate(date = dmy(date)) %>% 
  arrange(date)

SMI Visualisation

Here is a first plot of our whole data

whole_smi_plot <- smi %>% 
  ggplot()+
  geom_line(aes(date,price),
            size = 0.4)+
  labs(title = "SMI values",
       subtitle = "From 2015 to 2020",
       x = "Date",
       y = "Price in CHF")+
  theme_minimal()
whole_smi_plot

Let’s now take data since 2019 to produce our data

since_2019_plot <- smi %>% 
  filter(date > ymd("20190101")) %>% 
  ggplot()+
  geom_line(aes(date,price),
            size = 0.4)+
  labs(title = "SMI values",
       subtitle = "From 2019",
       x = "Date",
       y = "Price in CHF")+
  theme_minimal()
since_2019_plot

We can clearly see that Covid-19 gap towards the end here.

Let’s now add a red horizontal line at 8900CHF to our plot

hline_plot <- since_2019_plot+
  geom_hline(yintercept = 8900, 
             color = "red",
             size = 0.3)+
  labs(subtitle = "From 2019 with line at 8900CHF")
hline_plot

Let’s now use this line to see with plotly when was the last time the SMI value went below 8900 in 2019 and when was the first time in 2020

plotly::ggplotly(hline_plot)

We see here that the last date of 2019 is at 28th of January 2019 and first date of 2020 is at 12th of March 2020

Let’s now try to get these values with code

min_max_dates <- smi %>% 
  filter(price < 8900) %>% 
  mutate(year = year(date)) %>% 
  filter(year >= 2019) %>% 
  group_by(year) %>%  
  summarise(max_date = max(date),
            min_date = min(date)) %>%
  mutate(min_date = lead(min_date)) %>% 
  head(1)

Let’s use a stamp to format our date in the inline

sf <- stamp("Jan 13th 2020","%m %d %y")

We indeed get our previous dates : Jan 28th 2019 and on Mar 12th 2020

Let’s get how much time elapsed between the 2 dates

n_days <- as.numeric(min_max_dates$min_date - min_max_dates$max_date)

There has been 409 days

We can also see mean values by week

data_smi_wkly <- smi %>% 
  mutate(week = yearweek(date)) %>% 
  filter(week > as_date("2018-12-31"))

Let’s now plot this data

data_smi_wkly %>% 
    group_by(week) %>% 
    summarise(mean_price = mean(price)) %>% 
    ggplot()+
    geom_point(aes(week,mean_price),
              color = "blue",
              size = 0.8)+
    labs(title = "SMI weekly price mean",
        subtitle = "From 2019",
        x = "Week",
        y = "Mean price in CHF")+
    theme_minimal()

Here we use 2 data together to plot SMI values with the mean on top

data_smi_mean_wkly <- data_smi_wkly %>% 
  group_by(week) %>% 
  mutate(mean_price = mean(price)) %>% 
  distinct(mean_price)
data_smi_wkly %>% 
  ggplot()+
  geom_point(aes(week,price),
             size = 0.3)+
  geom_point(data = data_smi_mean_wkly,
             aes(week,mean_price),
             color = "blue",
             alpha = 0.8)+
  labs(title = "Weekly SMI values with mean ",
       subtitle = "Since January 2019",
       x = "Week",
       y = "Price")+
  theme_minimal()

Let’s do the same but by months this time

data_smi_mthly <- smi %>% 
  mutate(month = yearmonth(date)) %>% 
  filter(month > as_date("2018-12-31"))
data_smi_mean_mthly <- data_smi_mthly %>% 
  group_by(month) %>% 
  mutate(mean_price = mean(price)) %>% 
  distinct(mean_price)
data_smi_mthly %>% 
  ggplot()+
  geom_point(data = data_smi_mean_mthly,
             aes(month,mean_price),
             color = "blue",
             alpha = 0.8)+
  labs(title = "SMI monthly price mean ",
       subtitle = "Since January 2019",
       x = "Month",
       y = "Mean price in CHF")+
  theme_minimal()

And now we use 2 part data again but by months

data_smi_mthly %>% 
  ggplot()+
  geom_point(aes(month,price),
             size = 0.3)+
  geom_point(data = data_smi_mean_mthly,
             aes(month,mean_price),
             color = "blue",
             alpha = 0.8)+
  labs(title = "Monthly SMI values with mean ",
       subtitle = "Since January 2019",
       x = "Month",
       y = "Price")+
  theme_minimal()

Let’s see how boxplots look like with this data, first we have to factor month so it goes in the boxplot.

data_smi_mthly %>% 
  mutate(month = yearmonth(month)) %>% 
  ggplot(aes(group = month))+
  geom_boxplot(aes(month,price),
               size = 0.3,
               outlier.size = 0.3)+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(title = "Boxplot distribution of price per month",
       subtitle = "since 2019",
       x = "Month",
       y = "Price in CHF")

We see with these box plots how unstable the prices were during the first months of Covid-19.

Airport Trafic visualisation

Here I will focus on the Changi Airport (Singapour) CSV dataset on departures, arrivals and number of passengers monthly since 1980. I’m interested in the evolution of the number of passengers and when they travel in the year. The data can be found at : https://data.gov.sg/dataset/civil-aircraft-arrivals-departures-passengers-and-mail-changi-airport-monthly?resource_id=1a08ce4d-aafc-4fee-afb7-e8f4c3a41d80

Let’s load the data and filter for total passengers

passengers <- read_csv("data/passengers.csv") %>% 
  filter(level_1 == "Total Passengers") %>% 
  mutate(month = ym(month))

When do people tend to go in vacation in Singapour ?

passengers %>% 
  ggplot()+
  geom_line(aes(month,value/1000),
            size = 0.4)+
  labs(title = "Changi Airport trafic",
       subtitle = "Since 1980",
       x = "Year",
       y = "Number of passengers")+
  theme_minimal()

We can already see some periodic trends, with a big low in trafic around 2003 probably due to SARS epidemic in the region and even bigger low in 2020 to Covid-19. A little low as well in 2009 with the subprimes crisis.

Let’s zoom in between 2015 and 2020.

passengers %>% 
  filter(month > ymd("2014-12-31")) %>% 
  ggplot()+
  geom_line(aes(month,value/1000),
            size = 0.4)+
  labs(title = "Changi Airport trafic",
       subtitle = "Since 2015",
       x = "Year",
       y = "Number of passengers")+
  theme_minimal()

We see indeed that the highs are periodical and in 3 points : easter, summer vacations and the highest peak at christmas/new year.