1 Data Processing

Data <- bind_rows(Data2013,Data2014,Data2015,Data2016,Data2017,Data2018,Data2019,Data2020_JanFeb,Data2020_Mar,Data2020_Apr,Data2020_May,Data2020_Jun)

2 Descriptive Analysis

2.1 Route: SGN - HAN & HAN - SGN

SGN_HAN <- Data %>% filter(Route %in% c("HAN-SGN", "SGN-HAN"))
# Number of flight over time           
df <- SGN_HAN %>% group_by(ArrDep, FlightDate, Route, TotalPax) %>% summarise(n=n())
df3 <- df %>% group_by(FlightDate) %>% summarise(amount=sum(n))
df4 <- df %>% group_by(ArrDep, FlightDate) %>% summarise(amount=sum(n))%>% filter(amount >= "3")
df5 <- df %>% group_by(FlightDate) %>% summarise(PaxAmount=sum(TotalPax))
df6 <- df %>% group_by(ArrDep, FlightDate) %>% summarise(PaxAmount=sum(TotalPax)) %>% filter(PaxAmount > "0")

# Comparision Departure & Arrive  
ggplot(data = df3, aes(x = FlightDate, y = amount))+
  geom_line(color = "#00AFBB", size = 0.3)+ theme_minimal()

ggplot(df4, aes(x = FlightDate, y = amount)) + 
  geom_line(aes(color = ArrDep), size = 0.2) +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) +
  theme_minimal()

# Total pax departure and arrive 
ggplot(data = df5, aes(x = FlightDate, y = PaxAmount))+
  geom_line(color = "#00bb99", size = 0.3)+theme_minimal()

# Comparision Number of Departure & Arrive  
df6 <- df6 %>% filter(ArrDep != "d")
ggplot(df6, aes(x = FlightDate, y = PaxAmount)) + 
  geom_line(aes(color = ArrDep), size = 0.2) +
  scale_color_manual(values = c("#9600bb", "#00bbb8")) +
  theme_minimal()

2.2 Route: SGN - NRT & NRT - SGN

SGN_NRT <- Data %>% filter(Route %in% c("NRT-SGN", "SGN-NRT"))

# Number of flight over time           
df <- SGN_NRT %>% group_by(ArrDep, FlightDate, Route, TotalPax) %>% summarise(n=n())
df3 <- df %>% group_by(FlightDate) %>% summarise(amount=sum(n))
df4 <- df %>% group_by(ArrDep, FlightDate) %>% summarise(amount=sum(n))%>% filter(amount >= "3")
df5 <- df %>% group_by(FlightDate) %>% summarise(PaxAmount=sum(TotalPax))
df6 <- df %>% group_by(ArrDep, FlightDate) %>% summarise(PaxAmount=sum(TotalPax)) %>% filter(PaxAmount > "0")

# Total Departure & Arrive   
df3 <- df3 %>% group_by(month=floor_date(FlightDate, "month")) %>%
   summarize(amount=sum(amount))

ggplot(data = df3, aes(x = month, y = amount))+
  geom_line(color = "#00AFBB", size = 0.4)+ theme_minimal()

# Comparision Departure & Arrive  
df4 <- df4 %>% group_by(ArrDep, month=floor_date(FlightDate, "month")) %>%
   summarize(amount=sum(amount))

ggplot(df4, aes(x = month, y = amount)) + 
  geom_line(aes(color = ArrDep), size = 0.4) +
  scale_color_manual(values = c("#00AFBB", "#E7B800")) +
  theme_minimal()

# Total pax Departure & Arrive  
ggplot(data = df5, aes(x = FlightDate, y = PaxAmount))+
  geom_line(color = "#00bb99", size = 0.3) + theme_minimal()

# Comparision Departure & Arrive  
ggplot(df6, aes(x = FlightDate, y = PaxAmount)) + 
  geom_line(aes(color = ArrDep), size = 0.2) +
  scale_color_manual(values = c("#9600bb", "#00bbb8")) +
  theme_minimal()

3 Time Series Analysis

df8 <- df %>% group_by(month=floor_date(FlightDate, "month")) %>% summarize(amount=sum(n)) 
df8 <- df8 %>% filter(month <= as.Date("2020-01-31"))
df8$month <- NULL

df8_myts <- ts(df8, start=c(2013, 1), end=c(2020, 1), frequency=12)
plot.ts(df8_myts)

df8_timeseriescomponents <- decompose(df8_myts)
plot(df8_timeseriescomponents)

df8_timeseriesseasonallyadjusted <- df8_myts - df8_timeseriescomponents$seasonal
plot(df8_timeseriesseasonallyadjusted)

df8_forecasts <- HoltWinters(df8, beta=FALSE, gamma=FALSE)
df8_forecasts
## Holt-Winters exponential smoothing without trend and without seasonal component.
## 
## Call:
## HoltWinters(x = df8, beta = FALSE, gamma = FALSE)
## 
## Smoothing parameters:
##  alpha: 0.7375591
##  beta : FALSE
##  gamma: FALSE
## 
## Coefficients:
##      [,1]
## a 373.848
df8_forecasts$fitted
## Time Series:
## Start = 2 
## End = 85 
## Frequency = 1 
##         xhat     level
##  2 228.00000 228.00000
##  3 207.34835 207.34835
##  4 223.31772 223.31772
##  5 178.09228 178.09228
##  6 171.38619 171.38619
##  7 165.20088 165.20088
##  8 176.85366 176.85366
##  9 188.76254 188.76254
## 10 192.62548 192.62548
## 11 187.73880 187.73880
## 12 132.61452 132.61452
## 13 118.88521 118.88521
## 14 110.11917 110.11917
## 15 101.91812 101.91812
## 16 104.92875 104.92875
## 17 102.76862 102.76862
## 18 105.15195 105.15195
## 19  99.87697  99.87697
## 20 133.15787 133.15787
## 21 144.84238 144.84238
## 22 144.95863 144.95863
## 23 144.98914 144.98914
## 24 162.69857 162.69857
## 25 171.03404 171.03404
## 26 227.80098 227.80098
## 27 224.25997 224.25997
## 28 241.76965 241.76965
## 29 227.18836 227.18836
## 30 233.68747 233.68747
## 31 224.32971 224.32971
## 32 241.78795 241.78795
## 33 246.36970 246.36970
## 34 246.09703 246.09703
## 35 232.01184 232.01184
## 36 226.10263 226.10263
## 37 231.18985 231.18985
## 38 242.85077 242.85077
## 39 232.63501 232.63501
## 40 238.80468 238.80468
## 41 224.93512 224.93512
## 42 227.93321 227.93321
## 43 224.29468 224.29468
## 44 240.30363 240.30363
## 45 245.24260 245.24260
## 46 239.90075 239.90075
## 47 234.07348 234.07348
## 48 270.15968 270.15968
## 49 284.05553 284.05553
## 50 304.66623 304.66623
## 51 284.99831 284.99831
## 52 303.43853 303.43853
## 53 293.52682 293.52682
## 54 293.87582 293.87582
## 55 285.11670 285.11670
## 56 303.46960 303.46960
## 57 307.54860 307.54860
## 58 301.98106 301.98106
## 59 293.14432 293.14432
## 60 286.39984 286.39984
## 61 291.26785 291.26785
## 62 304.34636 304.34636
## 63 284.91436 284.91436
## 64 306.36674 306.36674
## 65 308.30892 308.30892
## 66 311.76887 311.76887
## 67 303.08863 303.08863
## 68 307.44862 307.44862
## 69 310.80553 310.80553
## 70 302.83581 302.83581
## 71 309.59494 309.59494
## 72 306.94346 306.94346
## 73 310.67296 310.67296
## 74 312.38929 312.38929
## 75 288.50027 288.50027
## 76 305.09515 305.09515
## 77 304.28741 304.28741
## 78 311.45102 311.45102
## 79 304.48033 304.48033
## 80 339.52890 339.52890
## 81 364.21581 364.21581
## 82 358.89372 358.89372
## 83 353.80920 353.80920
## 84 341.41142 341.41142
## 85 370.61034 370.61034
plot(df8_forecasts)

df8_forecasts$SSE
## [1] 40349.86
HoltWinters(df8_myts, beta=FALSE, gamma=FALSE, l.start=9831)
## Holt-Winters exponential smoothing without trend and without seasonal component.
## 
## Call:
## HoltWinters(x = df8_myts, beta = FALSE, gamma = FALSE, l.start = 9831)
## 
## Smoothing parameters:
##  alpha: 0.9968873
##  beta : FALSE
##  gamma: FALSE
## 
## Coefficients:
##       [,1]
## a 375.0183
library("forecast")
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:ggplot2':
## 
##     autolayer
df8_forecasts2 <- forecast:::forecast.HoltWinters(df8_forecasts, h=4)
plot(df8_forecasts2)