Import library

library(tidyverse)
library(gridExtra)
library(readxl)
library(lubridate)
library(hms)
library(forecast)
library(fastDummies)
library(imputeTS)
library(scales)
library(kableExtra)
library(modeltime)
library(Metrics)

Load dataset

df_raw <- read_excel("Data Awal dan Hasil Imputasi rev.xlsx", sheet = 2) %>% 
          mutate(Jam=as_hms(Jam), week = rep(1:51,each=336)[1:16992],
                 time=seq(ymd_hm("2018-1-1 0:00"), ymd_hm("2018-12-20 23:30"), by = 30*60),
                 month=month(time), wday=wday(time),day=day(time), 
                 hour=as_hms(time), yday=yday(time),
                 wtime=rep(seq(ymd_hm("2018-1-1 0:00"), 
                               ymd_hm("2018-1-7 23:30"), 
                               by = 30*60),51)[1:16992]) 
head(df_raw)
var_pollutant <- colnames(df_raw)[6:15]
var_pollutant
 [1] "SUF1_CO"   "SUF7_CO"   "SUF1_NO2"  "SUF7_NO2"  "SUF1_O3"   "SUF7_O3"   "SUF1_PM10" "SUF7_PM10" "SUF1_SO2"  "SUF7_SO2" 

Imputation

df <- list()
df[["na"]] <- data.frame(is.na(df_raw))
df[["impute_1"]] <- df_raw %>% 
                    group_by(Hari, J) %>% 
                    mutate_if(is.double,~replace_na(.,median(., na.rm = TRUE))) %>% 
                    ungroup()
`mutate_if()` ignored the following grouping variables:
• Columns `Hari`, `J`
df[["impute_2"]] <- df_raw %>% 
                    group_by(Hari, J) %>% 
                    mutate_if(is.double,~replace_na(.,mean(., na.rm = TRUE))) %>% 
                    ungroup()
`mutate_if()` ignored the following grouping variables:
• Columns `Hari`, `J`
df[["impute_3"]] <- df_raw %>% 
                    group_by(Hari, J) %>% 
                    mutate_if(is.double,~na_interpolation(., option="linear")) %>% 
                    ungroup()
`mutate_if()` ignored the following grouping variables:
• Columns `Hari`, `J`

Modelling

Dummy variables

df[["dummy"]] <- dummy_cols(df_raw, select_columns = c("J","Hari")) %>% 
                 mutate(t=seq_along(time)) %>% 
                 select(starts_with("J_"), starts_with("Hari_"),t)
for (k in var_pollutant){
  name <- paste0(k,"_imp_median")
  df[[name]] <- bind_cols(df[["impute_1"]], df[["dummy"]]) %>%
                select(time,starts_with(k), starts_with("J_"), starts_with("Hari_"),t)
  name <- paste0(k,"_imp_mean")
  df[[name]] <- bind_cols(df[["impute_2"]], df[["dummy"]]) %>% 
                select(time,starts_with(k), starts_with("J_"), starts_with("Hari_"),t)
  name <- paste0(k,"_imp_interpolation")
  df[[name]] <- bind_cols(df[["impute_3"]], df[["dummy"]]) %>% 
                select(time,starts_with(k), starts_with("J_"), starts_with("Hari_"),t)
}
ind <- df_raw %>% make_ts_splits(.length_test = 48)
ind
split [16944|48|16992]

Model Fitting and Forecasting

applyForecast <- function(data){
  ind <- data %>% make_ts_splits(.length_test = 48)
  train <- data[ind$idx_train,]
  test  <- data[ind$idx_test,]
  yy <- colnames(data)[2]
  f <- paste0(yy,"~.-1-time-Hari_1")
  m <- lm(f, data = train)
  r.squared <- summary(m)$r.squared
  pred <- m %>% predict(test)
  RMSE <- rmse(test[yy] %>% pull(),pred)
  MAPE <- mape(test[yy] %>% pull(),pred)
  sMAPE <- smape(test[yy] %>% pull(),pred)
  p_train <- ggplot(train %>% mutate(predicted=m$fitted.values)) + 
             geom_line(aes_(x=~time,y=as.name(yy))) +  
             geom_line(aes(x=time,y=predicted), color="red") +
             xlab("") +
             scale_x_datetime(date_breaks = "4 hour", date_labels = "%b %e %H:%M")
  p_test  <- ggplot(test %>% mutate(predicted=pred)) + 
             geom_line(aes_(x=~time,y=as.name(yy))) +  
             geom_line(aes(x=time,y=predicted), color="red") +
             xlab("") +
             scale_x_datetime(date_breaks = "4 hour", date_labels = "%b %e %H:%M")
  results <- list(prediction=pred, 
                  r.squared=r.squared,
                  RMSE=RMSE,
                  MAPE=MAPE,
                  sMAPE=sMAPE,
                  plot_train=p_train,
                  plot_test=p_test)
}
df_pollutant <- names(df)[grepl("imp_",names(df))]
res <- list()
for (name in df_pollutant){res[[name]] <- applyForecast(df[[name]])}

Forecast Comparison

dd <- as_tibble(res[[1]][2:5])
for (name in df_pollutant[-1]){
  dd <- dd %>% add_row(as_tibble(res[[name]][2:5]))
}
results <- dd %>% mutate(model=df_pollutant)

CO

results[1:6,]
grid.arrange(arrangeGrob(res[[2]]$plot_test, top = "TSR with Mean Imputation"),
             arrangeGrob(res[[4]]$plot_test, top = "TSR with Median Imputation"),
             ncol=1)

NO2

results[7:12,]
grid.arrange(arrangeGrob(res[[8]]$plot_test, top = "TSR with Mean Imputation"),
             arrangeGrob(res[[10]]$plot_test, top = "TSR with Median Imputation"),
             ncol=1)

O3

results[13:18,]
grid.arrange(arrangeGrob(res[[14]]$plot_test, top = "TSR with Mean Imputation"),
             arrangeGrob(res[[18]]$plot_test, top = "TSR with Linear Interpolation Imputation"),
             ncol=1)

PM10

results[19:24,]
grid.arrange(arrangeGrob(res[[20]]$plot_test, top = "TSR with Mean Imputation"),
             arrangeGrob(res[[22]]$plot_test, top = "TSR with Median Imputation"),
             ncol=1)

SO2

results[25:30,]
grid.arrange(arrangeGrob(res[[26]]$plot_test, top = "TSR with Mean Imputation"),
             arrangeGrob(res[[28]]$plot_test, top = "TSR with Median Imputation"),
             ncol=1)

LS0tCnRpdGxlOiAiVGltZSBTZXJpZXMgUmVncmVzc2lvbiIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IAogICAgdG9jOiB5ZXMKICAgIHRvY19mbG9hdDoKICAgICAgdG9jX2NvbGxhcHNlZDogeWVzCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKIyMgSW1wb3J0IGxpYnJhcnkKYGBge3IgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShncmlkRXh0cmEpCmxpYnJhcnkocmVhZHhsKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKbGlicmFyeShobXMpCmxpYnJhcnkoZm9yZWNhc3QpCmxpYnJhcnkoZmFzdER1bW1pZXMpCmxpYnJhcnkoaW1wdXRlVFMpCmxpYnJhcnkoc2NhbGVzKQpsaWJyYXJ5KGthYmxlRXh0cmEpCmxpYnJhcnkobW9kZWx0aW1lKQpsaWJyYXJ5KE1ldHJpY3MpCmBgYAoKIyMgTG9hZCBkYXRhc2V0CgpgYGB7cn0KZGZfcmF3IDwtIHJlYWRfZXhjZWwoIkRhdGEgQXdhbCBkYW4gSGFzaWwgSW1wdXRhc2kgcmV2Lnhsc3giLCBzaGVldCA9IDIpICU+JSAKICAgICAgICAgIG11dGF0ZShKYW09YXNfaG1zKEphbSksIHdlZWsgPSByZXAoMTo1MSxlYWNoPTMzNilbMToxNjk5Ml0sCiAgICAgICAgICAgICAgICAgdGltZT1zZXEoeW1kX2htKCIyMDE4LTEtMSAwOjAwIiksIHltZF9obSgiMjAxOC0xMi0yMCAyMzozMCIpLCBieSA9IDMwKjYwKSwKICAgICAgICAgICAgICAgICBtb250aD1tb250aCh0aW1lKSwgd2RheT13ZGF5KHRpbWUpLGRheT1kYXkodGltZSksIAogICAgICAgICAgICAgICAgIGhvdXI9YXNfaG1zKHRpbWUpLCB5ZGF5PXlkYXkodGltZSksCiAgICAgICAgICAgICAgICAgd3RpbWU9cmVwKHNlcSh5bWRfaG0oIjIwMTgtMS0xIDA6MDAiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB5bWRfaG0oIjIwMTgtMS03IDIzOjMwIiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYnkgPSAzMCo2MCksNTEpWzE6MTY5OTJdKSAKaGVhZChkZl9yYXcpCmBgYAoKCmBgYHtyfQp2YXJfcG9sbHV0YW50IDwtIGNvbG5hbWVzKGRmX3JhdylbNjoxNV0KdmFyX3BvbGx1dGFudApgYGAKCgojIyBJbXB1dGF0aW9uCgoKYGBge3Igd2FybmluZz1GQUxTRX0KZGYgPC0gbGlzdCgpCmRmW1sibmEiXV0gPC0gZGF0YS5mcmFtZShpcy5uYShkZl9yYXcpKQpkZltbImltcHV0ZV8xIl1dIDwtIGRmX3JhdyAlPiUgCiAgICAgICAgICAgICAgICAgICAgZ3JvdXBfYnkoSGFyaSwgSikgJT4lIAogICAgICAgICAgICAgICAgICAgIG11dGF0ZV9pZihpcy5kb3VibGUsfnJlcGxhY2VfbmEoLixtZWRpYW4oLiwgbmEucm0gPSBUUlVFKSkpICU+JSAKICAgICAgICAgICAgICAgICAgICB1bmdyb3VwKCkKZGZbWyJpbXB1dGVfMiJdXSA8LSBkZl9yYXcgJT4lIAogICAgICAgICAgICAgICAgICAgIGdyb3VwX2J5KEhhcmksIEopICU+JSAKICAgICAgICAgICAgICAgICAgICBtdXRhdGVfaWYoaXMuZG91YmxlLH5yZXBsYWNlX25hKC4sbWVhbiguLCBuYS5ybSA9IFRSVUUpKSkgJT4lIAogICAgICAgICAgICAgICAgICAgIHVuZ3JvdXAoKQpkZltbImltcHV0ZV8zIl1dIDwtIGRmX3JhdyAlPiUgCiAgICAgICAgICAgICAgICAgICAgZ3JvdXBfYnkoSGFyaSwgSikgJT4lIAogICAgICAgICAgICAgICAgICAgIG11dGF0ZV9pZihpcy5kb3VibGUsfm5hX2ludGVycG9sYXRpb24oLiwgb3B0aW9uPSJsaW5lYXIiKSkgJT4lIAogICAgICAgICAgICAgICAgICAgIHVuZ3JvdXAoKQpgYGAKCgojIyBNb2RlbGxpbmcKCiMjIyBEdW1teSB2YXJpYWJsZXMKYGBge3Igd2FybmluZz1GQUxTRX0KZGZbWyJkdW1teSJdXSA8LSBkdW1teV9jb2xzKGRmX3Jhdywgc2VsZWN0X2NvbHVtbnMgPSBjKCJKIiwiSGFyaSIpKSAlPiUgCiAgICAgICAgICAgICAgICAgbXV0YXRlKHQ9c2VxX2Fsb25nKHRpbWUpKSAlPiUgCiAgICAgICAgICAgICAgICAgc2VsZWN0KHN0YXJ0c193aXRoKCJKXyIpLCBzdGFydHNfd2l0aCgiSGFyaV8iKSx0KQpgYGAKCgoKYGBge3J9CmZvciAoayBpbiB2YXJfcG9sbHV0YW50KXsKICBuYW1lIDwtIHBhc3RlMChrLCJfaW1wX21lZGlhbiIpCiAgZGZbW25hbWVdXSA8LSBiaW5kX2NvbHMoZGZbWyJpbXB1dGVfMSJdXSwgZGZbWyJkdW1teSJdXSkgJT4lCiAgICAgICAgICAgICAgICBzZWxlY3QodGltZSxzdGFydHNfd2l0aChrKSwgc3RhcnRzX3dpdGgoIkpfIiksIHN0YXJ0c193aXRoKCJIYXJpXyIpLHQpCiAgbmFtZSA8LSBwYXN0ZTAoaywiX2ltcF9tZWFuIikKICBkZltbbmFtZV1dIDwtIGJpbmRfY29scyhkZltbImltcHV0ZV8yIl1dLCBkZltbImR1bW15Il1dKSAlPiUgCiAgICAgICAgICAgICAgICBzZWxlY3QodGltZSxzdGFydHNfd2l0aChrKSwgc3RhcnRzX3dpdGgoIkpfIiksIHN0YXJ0c193aXRoKCJIYXJpXyIpLHQpCiAgbmFtZSA8LSBwYXN0ZTAoaywiX2ltcF9pbnRlcnBvbGF0aW9uIikKICBkZltbbmFtZV1dIDwtIGJpbmRfY29scyhkZltbImltcHV0ZV8zIl1dLCBkZltbImR1bW15Il1dKSAlPiUgCiAgICAgICAgICAgICAgICBzZWxlY3QodGltZSxzdGFydHNfd2l0aChrKSwgc3RhcnRzX3dpdGgoIkpfIiksIHN0YXJ0c193aXRoKCJIYXJpXyIpLHQpCn0KYGBgCgoKYGBge3J9CmluZCA8LSBkZl9yYXcgJT4lIG1ha2VfdHNfc3BsaXRzKC5sZW5ndGhfdGVzdCA9IDQ4KQppbmQKYGBgCgoKIyMjIE1vZGVsIEZpdHRpbmcgYW5kIEZvcmVjYXN0aW5nCmBgYHtyfQphcHBseUZvcmVjYXN0IDwtIGZ1bmN0aW9uKGRhdGEpewogIGluZCA8LSBkYXRhICU+JSBtYWtlX3RzX3NwbGl0cygubGVuZ3RoX3Rlc3QgPSA0OCkKICB0cmFpbiA8LSBkYXRhW2luZCRpZHhfdHJhaW4sXQogIHRlc3QgIDwtIGRhdGFbaW5kJGlkeF90ZXN0LF0KICB5eSA8LSBjb2xuYW1lcyhkYXRhKVsyXQogIGYgPC0gcGFzdGUwKHl5LCJ+Li0xLXRpbWUtSGFyaV8xIikKICBtIDwtIGxtKGYsIGRhdGEgPSB0cmFpbikKICByLnNxdWFyZWQgPC0gc3VtbWFyeShtKSRyLnNxdWFyZWQKICBwcmVkIDwtIG0gJT4lIHByZWRpY3QodGVzdCkKICBSTVNFIDwtIHJtc2UodGVzdFt5eV0gJT4lIHB1bGwoKSxwcmVkKQogIE1BUEUgPC0gbWFwZSh0ZXN0W3l5XSAlPiUgcHVsbCgpLHByZWQpCiAgc01BUEUgPC0gc21hcGUodGVzdFt5eV0gJT4lIHB1bGwoKSxwcmVkKQogIHBfdHJhaW4gPC0gZ2dwbG90KHRyYWluICU+JSBtdXRhdGUocHJlZGljdGVkPW0kZml0dGVkLnZhbHVlcykpICsgCiAgICAgICAgICAgICBnZW9tX2xpbmUoYWVzXyh4PX50aW1lLHk9YXMubmFtZSh5eSkpKSArICAKICAgICAgICAgICAgIGdlb21fbGluZShhZXMoeD10aW1lLHk9cHJlZGljdGVkKSwgY29sb3I9InJlZCIpICsKICAgICAgICAgICAgIHhsYWIoIiIpICsKICAgICAgICAgICAgIHNjYWxlX3hfZGF0ZXRpbWUoZGF0ZV9icmVha3MgPSAiNCBob3VyIiwgZGF0ZV9sYWJlbHMgPSAiJWIgJWUgJUg6JU0iKQogIHBfdGVzdCAgPC0gZ2dwbG90KHRlc3QgJT4lIG11dGF0ZShwcmVkaWN0ZWQ9cHJlZCkpICsgCiAgICAgICAgICAgICBnZW9tX2xpbmUoYWVzXyh4PX50aW1lLHk9YXMubmFtZSh5eSkpKSArICAKICAgICAgICAgICAgIGdlb21fbGluZShhZXMoeD10aW1lLHk9cHJlZGljdGVkKSwgY29sb3I9InJlZCIpICsKICAgICAgICAgICAgIHhsYWIoIiIpICsKICAgICAgICAgICAgIHNjYWxlX3hfZGF0ZXRpbWUoZGF0ZV9icmVha3MgPSAiNCBob3VyIiwgZGF0ZV9sYWJlbHMgPSAiJWIgJWUgJUg6JU0iKQogIHJlc3VsdHMgPC0gbGlzdChwcmVkaWN0aW9uPXByZWQsIAogICAgICAgICAgICAgICAgICByLnNxdWFyZWQ9ci5zcXVhcmVkLAogICAgICAgICAgICAgICAgICBSTVNFPVJNU0UsCiAgICAgICAgICAgICAgICAgIE1BUEU9TUFQRSwKICAgICAgICAgICAgICAgICAgc01BUEU9c01BUEUsCiAgICAgICAgICAgICAgICAgIHBsb3RfdHJhaW49cF90cmFpbiwKICAgICAgICAgICAgICAgICAgcGxvdF90ZXN0PXBfdGVzdCkKfQpgYGAKCgpgYGB7cn0KZGZfcG9sbHV0YW50IDwtIG5hbWVzKGRmKVtncmVwbCgiaW1wXyIsbmFtZXMoZGYpKV0KcmVzIDwtIGxpc3QoKQpmb3IgKG5hbWUgaW4gZGZfcG9sbHV0YW50KXtyZXNbW25hbWVdXSA8LSBhcHBseUZvcmVjYXN0KGRmW1tuYW1lXV0pfQpgYGAKCgojIyBGb3JlY2FzdCBDb21wYXJpc29uCmBgYHtyfQpkZCA8LSBhc190aWJibGUocmVzW1sxXV1bMjo1XSkKZm9yIChuYW1lIGluIGRmX3BvbGx1dGFudFstMV0pewogIGRkIDwtIGRkICU+JSBhZGRfcm93KGFzX3RpYmJsZShyZXNbW25hbWVdXVsyOjVdKSkKfQpyZXN1bHRzIDwtIGRkICU+JSBtdXRhdGUobW9kZWw9ZGZfcG9sbHV0YW50KQpgYGAKCgojIyMgQ08KYGBge3J9CnJlc3VsdHNbMTo2LF0KYGBgCgpgYGB7cn0KZ3JpZC5hcnJhbmdlKGFycmFuZ2VHcm9iKHJlc1tbMl1dJHBsb3RfdGVzdCwgdG9wID0gIlRTUiB3aXRoIE1lYW4gSW1wdXRhdGlvbiIpLAogICAgICAgICAgICAgYXJyYW5nZUdyb2IocmVzW1s0XV0kcGxvdF90ZXN0LCB0b3AgPSAiVFNSIHdpdGggTWVkaWFuIEltcHV0YXRpb24iKSwKICAgICAgICAgICAgIG5jb2w9MSkKYGBgCgoKCiMjIyBOTzIKYGBge3J9CnJlc3VsdHNbNzoxMixdCmBgYAoKYGBge3J9CmdyaWQuYXJyYW5nZShhcnJhbmdlR3JvYihyZXNbWzhdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWFuIEltcHV0YXRpb24iKSwKICAgICAgICAgICAgIGFycmFuZ2VHcm9iKHJlc1tbMTBdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWRpYW4gSW1wdXRhdGlvbiIpLAogICAgICAgICAgICAgbmNvbD0xKQpgYGAKCgoKIyMjIE8zCmBgYHtyfQpyZXN1bHRzWzEzOjE4LF0KYGBgCgpgYGB7cn0KZ3JpZC5hcnJhbmdlKGFycmFuZ2VHcm9iKHJlc1tbMTRdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWFuIEltcHV0YXRpb24iKSwKICAgICAgICAgICAgIGFycmFuZ2VHcm9iKHJlc1tbMThdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBMaW5lYXIgSW50ZXJwb2xhdGlvbiBJbXB1dGF0aW9uIiksCiAgICAgICAgICAgICBuY29sPTEpCmBgYAoKCiMjIyBQTTEwCmBgYHtyfQpyZXN1bHRzWzE5OjI0LF0KYGBgCgpgYGB7cn0KZ3JpZC5hcnJhbmdlKGFycmFuZ2VHcm9iKHJlc1tbMjBdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWFuIEltcHV0YXRpb24iKSwKICAgICAgICAgICAgIGFycmFuZ2VHcm9iKHJlc1tbMjJdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWRpYW4gSW1wdXRhdGlvbiIpLAogICAgICAgICAgICAgbmNvbD0xKQpgYGAKCgojIyMgU08yCmBgYHtyfQpyZXN1bHRzWzI1OjMwLF0KYGBgCgpgYGB7cn0KZ3JpZC5hcnJhbmdlKGFycmFuZ2VHcm9iKHJlc1tbMjZdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWFuIEltcHV0YXRpb24iKSwKICAgICAgICAgICAgIGFycmFuZ2VHcm9iKHJlc1tbMjhdXSRwbG90X3Rlc3QsIHRvcCA9ICJUU1Igd2l0aCBNZWRpYW4gSW1wdXRhdGlvbiIpLAogICAgICAgICAgICAgbmNvbD0xKQpgYGAKCgoKCgoKCgoKCg==