# Load packages
if (!require("pacman")) install.packages("pacman") # allow easy package installation

# load required packages
pacman::p_load(tidyverse, # for data wrangling, visualization, etc 
               janitor,   # for data cleaning
               forecast)      # for time serie analysis
theme_set(theme_minimal()) #set the theme for all the graphs

Dataset

df <- read_csv("Event_Segmentation.csv", skip = 6) # read the csv and skipe the first n rows

Check the dataset

head(df)

Transpose the dataset

df <- df %>%
  pivot_longer(-Event, names_to = "date", values_to = "n_type") %>% 
  mutate(date = lubridate::ymd(date) ) %>% 
  janitor::clean_names() %>% 
  tibble::rowid_to_column( "id") # a column with unique id

Check the dataset

head(df)

Exploratory Data Analysis (EDA)

df %>%
  ggplot(aes(x = date, 
             y = n_type, 
             color = event)) +
  geom_line() +
  labs(    title = "EDA",
           y = "n",
           x = "Date",
           color = "Event"
  )

df %>%
  ggplot(aes(x = date, 
             y = n_type, 
             color = event)) +
  geom_line() +
  labs(    title = "EDA with regression line and smoothing each 7 days",
           y = "n",
           x = "Date",
           color = "Event"
  ) + 
  geom_smooth(method = "loess", se = TRUE, span = 0.7)

df %>%
  ggplot(aes(x = date, 
             y = n_type, 
             color = event)) +
  geom_line() +
  labs(    title = "EDA",
           y = "n",
           x = "Date",
           color = "Event"
  ) + 
  facet_grid(.~event)

df %>%
  ggplot(aes(x = date, 
             y = n_type, 
             color = event)) +
  geom_line(alpha = .5) +
  labs(    title = "EDA with regression line, smoothing each 7 days",
           y = "n",
           x = "Date",
           color = "Event"
  ) + 
  facet_grid(.~event) + 
  geom_smooth(method = "loess", se = TRUE, span = 0.7)

Grouped by week

df %>% 
  group_by(week = lubridate::week(date)) %>%
  ggplot(aes(x = as.factor(week), y = n_type, color = event)) + 
  geom_boxplot() + 
  facet_grid(.~event)

df %>% 
  group_by(week = lubridate::week(date), event) %>%
  summarise(mean_n = mean(n_type)) %>% 
  pivot_wider(event)

Time serie analysis

Compare each monthly week

pacman::p_load(tibble, 
               feasts, 
               fable)

Convert df to tsible format

LS0tCnRpdGxlOiAiVGltcyBzZXJpZSBhbmFseXNpcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KG1lc3NhZ2U9RkFMU0UpCmBgYAoKYGBge3J9CiMgTG9hZCBwYWNrYWdlcwppZiAoIXJlcXVpcmUoInBhY21hbiIpKSBpbnN0YWxsLnBhY2thZ2VzKCJwYWNtYW4iKSAjIGFsbG93IGVhc3kgcGFja2FnZSBpbnN0YWxsYXRpb24KCiMgbG9hZCByZXF1aXJlZCBwYWNrYWdlcwpwYWNtYW46OnBfbG9hZCh0aWR5dmVyc2UsICMgZm9yIGRhdGEgd3JhbmdsaW5nLCB2aXN1YWxpemF0aW9uLCBldGMgCiAgICAgICAgICAgICAgIGphbml0b3IsICAgIyBmb3IgZGF0YSBjbGVhbmluZwogICAgICAgICAgICAgICBmb3JlY2FzdCkgICAgICAjIGZvciB0aW1lIHNlcmllIGFuYWx5c2lzCmBgYApgYGB7cn0KdGhlbWVfc2V0KHRoZW1lX21pbmltYWwoKSkgI3NldCB0aGUgdGhlbWUgZm9yIGFsbCB0aGUgZ3JhcGhzCmBgYAoKCiMgRGF0YXNldApgYGB7cn0KZGYgPC0gcmVhZF9jc3YoIkV2ZW50X1NlZ21lbnRhdGlvbi5jc3YiLCBza2lwID0gNikgIyByZWFkIHRoZSBjc3YgYW5kIHNraXBlIHRoZSBmaXJzdCBuIHJvd3MKYGBgCkNoZWNrIHRoZSBkYXRhc2V0CmBgYHtyfQpoZWFkKGRmKQpgYGAKVHJhbnNwb3NlIHRoZSBkYXRhc2V0CmBgYHtyfQpkZiA8LSBkZiAlPiUKICBwaXZvdF9sb25nZXIoLUV2ZW50LCBuYW1lc190byA9ICJkYXRlIiwgdmFsdWVzX3RvID0gIm5fdHlwZSIpICU+JSAKICBtdXRhdGUoZGF0ZSA9IGx1YnJpZGF0ZTo6eW1kKGRhdGUpICkgJT4lIAogIGphbml0b3I6OmNsZWFuX25hbWVzKCkgJT4lIAogIHRpYmJsZTo6cm93aWRfdG9fY29sdW1uKCAiaWQiKSAjIGEgY29sdW1uIHdpdGggdW5pcXVlIGlkCgpgYGAKCkNoZWNrIHRoZSBkYXRhc2V0CmBgYHtyfQpoZWFkKGRmKQpgYGAKCiMgRXhwbG9yYXRvcnkgRGF0YSBBbmFseXNpcyAoRURBKQoKYGBge3J9CmRmICU+JQogIGdncGxvdChhZXMoeCA9IGRhdGUsIAogICAgICAgICAgICAgeSA9IG5fdHlwZSwgCiAgICAgICAgICAgICBjb2xvciA9IGV2ZW50KSkgKwogIGdlb21fbGluZSgpICsKICBsYWJzKCAgICB0aXRsZSA9ICJFREEiLAogICAgICAgICAgIHkgPSAibiIsCiAgICAgICAgICAgeCA9ICJEYXRlIiwKICAgICAgICAgICBjb2xvciA9ICJFdmVudCIKICApCmBgYApgYGB7cn0KZGYgJT4lCiAgZ2dwbG90KGFlcyh4ID0gZGF0ZSwgCiAgICAgICAgICAgICB5ID0gbl90eXBlLCAKICAgICAgICAgICAgIGNvbG9yID0gZXZlbnQpKSArCiAgZ2VvbV9saW5lKCkgKwogIGxhYnMoICAgIHRpdGxlID0gIkVEQSB3aXRoIHJlZ3Jlc3Npb24gbGluZSBhbmQgc21vb3RoaW5nIGVhY2ggNyBkYXlzIiwKICAgICAgICAgICB5ID0gIm4iLAogICAgICAgICAgIHggPSAiRGF0ZSIsCiAgICAgICAgICAgY29sb3IgPSAiRXZlbnQiCiAgKSArIAogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsb2VzcyIsIHNlID0gVFJVRSwgc3BhbiA9IDAuNykKYGBgCgoKCgpgYGB7cn0KZGYgJT4lCiAgZ2dwbG90KGFlcyh4ID0gZGF0ZSwgCiAgICAgICAgICAgICB5ID0gbl90eXBlLCAKICAgICAgICAgICAgIGNvbG9yID0gZXZlbnQpKSArCiAgZ2VvbV9saW5lKCkgKwogIGxhYnMoICAgIHRpdGxlID0gIkVEQSIsCiAgICAgICAgICAgeSA9ICJuIiwKICAgICAgICAgICB4ID0gIkRhdGUiLAogICAgICAgICAgIGNvbG9yID0gIkV2ZW50IgogICkgKyAKICBmYWNldF9ncmlkKC5+ZXZlbnQpCmBgYAoKYGBge3J9CmRmICU+JQogIGdncGxvdChhZXMoeCA9IGRhdGUsIAogICAgICAgICAgICAgeSA9IG5fdHlwZSwgCiAgICAgICAgICAgICBjb2xvciA9IGV2ZW50KSkgKwogIGdlb21fbGluZShhbHBoYSA9IC41KSArCiAgbGFicyggICAgdGl0bGUgPSAiRURBIHdpdGggcmVncmVzc2lvbiBsaW5lLCBzbW9vdGhpbmcgZWFjaCA3IGRheXMiLAogICAgICAgICAgIHkgPSAibiIsCiAgICAgICAgICAgeCA9ICJEYXRlIiwKICAgICAgICAgICBjb2xvciA9ICJFdmVudCIKICApICsgCiAgZmFjZXRfZ3JpZCgufmV2ZW50KSArIAogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsb2VzcyIsIHNlID0gVFJVRSwgc3BhbiA9IDAuNykKYGBgCgoKCiMjIEdyb3VwZWQgYnkgd2VlawoKYGBge3J9CmRmICU+JSAKICBncm91cF9ieSh3ZWVrID0gbHVicmlkYXRlOjp3ZWVrKGRhdGUpKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSBhcy5mYWN0b3Iod2VlayksIHkgPSBuX3R5cGUsIGNvbG9yID0gZXZlbnQpKSArIAogIGdlb21fYm94cGxvdCgpICsgCiAgZmFjZXRfZ3JpZCgufmV2ZW50KQpgYGAKCmBgYHtyfQpkZiAlPiUgCiAgZ3JvdXBfYnkod2VlayA9IGx1YnJpZGF0ZTo6d2VlayhkYXRlKSwgZXZlbnQpICU+JQogIHN1bW1hcmlzZShtZWFuX24gPSBtZWFuKG5fdHlwZSkpICU+JSAKICBwaXZvdF93aWRlcihldmVudCkKYGBgCgoKIyBUaW1lIHNlcmllIGFuYWx5c2lzCgojIyBDb21wYXJlIGVhY2ggbW9udGhseSB3ZWVrCgpgYGB7ciwgbWVzc2FnZT1GQUxTRX0KCiMgRXh0cmFjdCBtb250aCBhbmQgeWVhciBhbmQgc3RvcmUgaW4gc2VwYXJhdGUgY29sdW1ucwpkZiA8LSBkZiAlPiUgCiAgbXV0YXRlKG1vbnRoID0gbHVicmlkYXRlOjptb250aChkYXRlKSwKICAgICAgICAgd2Vla2RheSA9IGx1YnJpZGF0ZTo6d2RheShkYXRlKSwgIAogICAgICAgICB3ZWVrZGF5X25hbWUgPSBzdHJmdGltZShkYXRlLCclQScpKQoKIyBDcmVhdGUgYSBjb2xvdXIgcGFsZXR0ZSB1c2luZyB0aGUgYGNvbG9ydG9vbHNgIHBhY2thZ2UgCnBhY21hbjo6cF9sb2FkKGNvbG9ydG9vbHMpCndlZWtfcGFsIDwtIGNvbG9ydG9vbHM6OnNlcXVlbnRpYWwoY29sb3IgPSAiZGFya3R1cnF1b2lzZSIsIHBlcmNlbnRhZ2UgPSA1LCB3aGF0ID0gInZhbHVlIikKCiMgTWFrZSB0aGUgcGxvdApkZiAlPiUgCiAgZ2dwbG90KCkKCmBgYAoKCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQpwYWNtYW46OnBfbG9hZCh0aWJibGUsIAogICAgICAgICAgICAgICBmZWFzdHMsIAogICAgICAgICAgICAgICBmYWJsZSkKYGBgCgpDb252ZXJ0IGRmIHRvIHRzaWJsZSBmb3JtYXQKCmBgYHtyfQpkZl90c2JsIDwtIGFzX3RzaWJibGUoZGYsIGtleSA9IGlkLCBpbmRleCA9IGRhdGUpICNjb252ZXJ0IHRoZSBkZiB0byB0c2liYmxlIGZvcm1hdApoZWFkKGRmX3RzYmwpICMgY2hlY2sKYGBgCgoK