# Load data
air_trafic <- read.csv("/Users/pin.lyu/Desktop/BC_Class_Folder/Predictive_Analytics/Data_Sets/air_traffic.csv")
# Check variables
summary(air_trafic)
## Year Month Dom_Pax Int_Pax
## Min. :2003 Min. : 1.000 Length:249 Length:249
## 1st Qu.:2008 1st Qu.: 3.000 Class :character Class :character
## Median :2013 Median : 6.000 Mode :character Mode :character
## Mean :2013 Mean : 6.446
## 3rd Qu.:2018 3rd Qu.: 9.000
## Max. :2023 Max. :12.000
## Pax Dom_Flt Int_Flt Flt
## Length:249 Length:249 Length:249 Length:249
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Dom_RPM Int_RPM RPM Dom_ASM
## Length:249 Length:249 Length:249 Length:249
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Int_ASM ASM Dom_LF Int_LF
## Length:249 Length:249 Min. :13.09 Min. :23.15
## Class :character Class :character 1st Qu.:77.72 1st Qu.:76.43
## Mode :character Mode :character Median :82.12 Median :79.91
## Mean :79.94 Mean :77.30
## 3rd Qu.:85.24 3rd Qu.:82.93
## Max. :89.96 Max. :89.44
## LF
## Min. :13.83
## 1st Qu.:76.94
## Median :81.58
## Mean :79.29
## 3rd Qu.:84.07
## Max. :89.14
# Check N/As
missmap(air_trafic)
# Extract columns of interest
air <- air_trafic |>
select('Year',
'Month',
'Dom_Pax', # Total numbers of domestic passengers
'Int_Pax', # Total numbers of international passengers
'Pax', # Total numbers of passengers
'Dom_Flt', # Total numbers of domestic flights
'Int_Flt', # Total numbers of international flights
'Flt' # Total number of flights
)
## Create new time colunm for grahing
# Convert year and month columns to character
air$Year <- as.character(air$Year)
air$Month <- as.character(air$Month)
# Combine year and month columns into a single string
air$time <- as.Date(paste0(air$Year, '-', air$Month, '-01'))
# Delete comma in numbers
# Remove commas & Convert the column to numeric
air$Int_Flt <- as.numeric(gsub(",", "", air$Int_Flt))
air$Flt <- as.numeric(gsub(",", "", air$Flt))
# Historic record of total flights in the U.S. from 2003 to 2023
ggplot(air, aes(x = time, y = Flt)) +
geom_line() +
labs(x = 'Time', y = 'Number of Flights', title = 'Total Number of Monthly Flights In The U.S. (2003-2023)') +
theme_classic() +
scale_x_date(date_breaks = "2 year",
date_labels = "%Y-%m")
Update: I continued using the same data set that I used for last week’s discussion. However, this time I made some changes. Instead of only looking at the total number of monthly international flights, I switched to the total monthly flights from 2003 to 2023, which includes the total number of international and domestic monthly flights. Overall, the level trend is the same comparing to the graph for total monthly international flights.
air_TS <- ts(air$Flt, start = c(2003, 1), frequency = 12)
ets_model_1 <- ets(air_TS)
print(summary(ets_model_1))
## ETS(A,N,A)
##
## Call:
## ets(y = air_TS)
##
## Smoothing parameters:
## alpha = 0.9999
## gamma = 1e-04
##
## Initial states:
## l = 868159.6051
## s = -9039.304 -25490.73 9659.593 -19105.24 57174.14 60439.46
## 20069.12 10088.58 -18498.4 22907.19 -81045.74 -27158.67
##
## sigma: 32722.21
##
## AIC AICc BIC
## 6566.550 6568.610 6619.312
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -544.2574 31789 13240.64 -0.4694376 2.37158 0.2483635 0.2651666
ets_model_2 <- air_TS |>
ets(model = 'AAN')
print(summary(ets_model_2))
## ETS(A,Ad,N)
##
## Call:
## ets(y = air_TS, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.7452
## beta = 1e-04
## phi = 0.9145
##
## Initial states:
## l = 809245.9453
## b = 8188.0656
##
## sigma: 55580.05
##
## AIC AICc BIC
## 6821.733 6822.081 6842.838
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -914.273 55019.18 36274.13 -0.8759635 5.586221 0.6804182
## ACF1
## Training set -0.01824729
ets_model_3 <- air_TS |>
ets(model = 'MAN')
print(summary(ets_model_3))
## ETS(M,A,N)
##
## Call:
## ets(y = air_TS, model = "MAN")
##
## Smoothing parameters:
## alpha = 0.8116
## beta = 1e-04
##
## Initial states:
## l = 815732.7348
## b = 8187.786
##
## sigma: 0.0844
##
## AIC AICc BIC
## 6899.353 6899.600 6916.941
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -10376.87 56136.53 36949.02 -2.110406 5.67468 0.6930777
## ACF1
## Training set -0.08687608
Based on the results above, we can see that the A,N,A model has the lowest AICc and BIC value among the three models, indicating that it is the superior model out of the three.