library(forecast)
library(cluster)
source("ArimaxMultipleInput_EDXU.R")
## Data for dtu31761a3d
frameData <- function(){
dat <- read.csv("passengers.csv", header = T)
num_obs <- length(dat[,1])
seq_12 <- seq(12)
vec_month <- rep(0, num_obs)
vec_year <- rep(0, num_obs)
for (i in 1: (num_obs / 12)){
vec_month[(12 * i - 11): (12 * i)] <- seq_12
vec_year[(12 * i - 11): (12 * i)] <- rep(i, 12)
}
dat.f <- data.frame("series" = seq(1, num_obs), "year" = vec_year, "month" = vec_month, "pass" = dat[,1])
return(dat.f)
}
dat.f <- frameData()
rm(frameData)
num_obs <- length(dat.f$series)
par(bty="n")
plot(factor(dat.f$month), dat.f$pass, xlab = "Month", ylab = "1000 Passengers",
main = "Box Plot of Passengers By Month")
plot(factor(dat.f$year), dat.f$pass, xlab = "Year", ylab = "1000 Passengers", bty = "n",
main = "Box Plot of Passengers By Year")
vec_ave_year <- rep(0, 11)
vec_pass.ave <- rep(0, 132)
for (i in 1: 11){
vec_ave_year[i] <- sum(dat.f$pass[dat.f$year == i]) / 12
vec_pass.ave[dat.f$year == i] <- dat.f$pass[dat.f$year == i] - vec_ave_year[i]
}
# dat.f["pass.ave"] <- vec_pass.ave
dat.f["pass.log"] <- log(dat.f$pass)
plot(dat.f$series, dat.f$pass.log, type = "b", col = "blue", bty = "n")
plotTimeSeries(dat.f$pass.log, num_lag = 24)
ts_pass.log <- ts(dat.f$pass.log, frequency = 12)
mod_auto <- auto.arima(ts_pass.log)
plotTimeSeriesResidual(mod_auto$residuals, num_lag = 24)
mod_1 <- arima(dat.f$pass.log, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
plotTimeSeriesResidual(mod_1$residual, num_lag = 24)
num_step <- 12 # predict the next 24 hours
num_scena <- 1000 # initial number of scenarios
num_scena.red <- 10 # Reduce number of scenarios
# Data structure to store the scenarios
mat_scena <- matrix(NA, nrow = num_scena, ncol = num_step)
# Loop over scenarios and simulate with the arima model a prediction for the next 24 hours
for(w in 1: num_scena){
mat_scena[w,] <- simulate(mod_1, nsim = num_step, future = TRUE, seed = w)
}
mat_scena.exp <- exp(mat_scena)
plot(dat.f$pass, type = 'l', col='red', lwd = 1, bty="n", cex = , xlab = 'Time (months)',
ylab = 'Number of Passengers (Thousands)', ylim = c(min(dat.f$pass), max(mat_scena.exp)),
xlim = c(0, num_obs + 12), main = 'Scenario Generation for Number of Passengers in One Future Year')
for(w in 1: num_scena){
lines(seq((num_obs + 1), (num_obs + 12)), mat_scena.exp[w, ], col = 'grey', lwd = 0.5)
}
legend('topleft', inset = .02, legend = c('Observations', 'Scenarios'),
col = c('red', 'grey'), lwd = c(1, 0.5), lty = c(1, 1))
mod_pam <- pam(mat_scena.exp, num_scena.red)
There were 13 warnings (use warnings() to see them)
mat_scena.exp.red <- mod_pam$medoids
plot(dat.f$pass, type = 'l', col='red', lwd = 1, bty="n", cex = , xlab = 'Time (months)',
ylab = 'Number of Passengers (Thousands)', ylim = c(min(dat.f$pass), max(mat_scena.exp)),
xlim = c(0, num_obs + 12), main = 'Scenario Reduction for Number of Passengers in One Future Year')
for(w in 1: num_scena.red){
lines(seq((num_obs + 1), (num_obs + 12)), mat_scena.exp.red[w,], col = 'grey', lwd = 0.5)
}
legend('topleft', inset = .02, legend = c('Observations', 'Scenarios'),
col = c('red', 'grey'), lwd = c(1, 0.5), lty = c(1, 1))
vec_prob.red <- rep(0, 12)
for (i in 1: num_scena.red) {
vec_prob.red[i] <- (1 / num_scena) * sum(mod_pam$clustering == i, na.rm = TRUE)
}
sum(vec_prob.red) > 1 - 10^6 # Check if the sum of prob is 1
[1] TRUE