Packaging

library(readxl)
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.1
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.2
## Registered S3 methods overwritten by 'forecast':
##   method       from
##   fitted.Arima TSA 
##   plot.Arima   TSA
library(lattice)
## Warning: package 'lattice' was built under R version 4.5.2
library(ggplot2)
library(lattice)
library(StatDA)
## Warning: package 'StatDA' was built under R version 4.5.1
## Loading required package: sgeostat
## 
## Attaching package: 'sgeostat'
## The following object is masked from 'package:TSA':
## 
##     lagplot
## Registered S3 method overwritten by 'geoR':
##   method         from    
##   plot.variogram sgeostat
library(rugarch)
## Warning: package 'rugarch' was built under R version 4.5.2
## Loading required package: parallel
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.5.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(gridExtra)
library(knitr)
library(FinTS)
## Warning: package 'FinTS' was built under R version 4.5.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.5.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'FinTS'
## The following object is masked from 'package:forecast':
## 
##     Acf
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Data Import

#Import from excel
Data<- read_excel("C:/Users/HUAWEI/Desktop/Year 2 SEM 2/ARECA Case Competition/soong.xlsx")

# Fix the data cleaning - convert to numeric first
Data <- Data %>%
  mutate(
    # Convert to numeric, handling any conversion issues
    Original_Loss_Numeric = as.numeric(`ORIGINAL LOSS VALUE`),
  ) %>%
  group_by(Year) %>%
  summarise(
    Total_Original_Loss = sum(Original_Loss_Numeric, na.rm = TRUE),
    Event_Count = n(),
    Avg_Loss_Per_Event = Total_Original_Loss / Event_Count
  ) %>%
  ungroup() %>%
  filter(Year >= 1980)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Original_Loss_Numeric = as.numeric(`ORIGINAL LOSS VALUE`)`.
## Caused by warning:
## ! NAs introduced by coercion
View(Data)
Loss_data<-Data[-c(1,2),]
View(Loss_data)
colnames(Loss_data)<- c("Date", "Ori Loss", "Number of Event", "Adj Close")

#Obtaining Daily Returns
returns <- diff(log(Loss_data$`Adj Close`))

DATA ANALYSIS

Data Visualisation

Average Losses Trend

ggplot(data = Loss_data, aes(x = Date, y = `Adj Close`)) + geom_line(colour = "blue", size = 0.5) + labs(title = "Trend of Average Losses from 1982 - 2025", x = "Date", y = "Average Losses (per Event)(Millions, $)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Average Losses Yearly Changes

ts.plot(returns, gpars = list(xlab="Time", ylab = "Daily Returns (%)", main = "Yearly Fluctuations of Average Losses"))

Density of Average Losses

hist(returns, col = "lightblue", border = "black", prob = TRUE, xlab = "Daily Returns (%)", main = "Density of Average Losses")
lines(density(returns), lwd = 2, col = "red")

Analysis of Annual Data Pattern

#Format Conversion for Dates
str(Loss_data)
## tibble [44 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Date           : num [1:44] 1982 1983 1984 1985 1986 ...
##  $ Ori Loss       : num [1:44] 1.00e+07 1.88e+08 1.05e+08 2.85e+08 2.15e+08 ...
##  $ Number of Event: int [1:44] 12 17 18 14 19 10 11 4 8 15 ...
##  $ Adj Close      : num [1:44] 833333 11058824 5833333 20357143 11315789 ...

QQ-Plot of NINTENDO Daily Returns

qqplot.das(returns, distribution = "norm", ylab = "Sample quantiles",
           xlab = "Theoretical Quantiles", main = "Normal Q-Q Plot Average Losses Yearly Fluctuations (1982 - 2025)", las = par("las"),
           datax = FALSE, envelope = 0.95, labels = FALSE, col = "red",
           lwd = 2, pch = 20,line = c("quartiles","robust","none"), cex = 1,
           xaxt = "s", add.plot = FALSE, xlim = NULL, ylim = NULL)

LINEAR MODELLING

Test of Stationarity

Observation on Initial Data

Method 1: Analysis of Sample ACF and PACF

acf(log(Loss_data$`Adj Close`), main="ACF of Average Losses")

pacf(log(Loss_data$`Adj Close`), main="PACF of Average Losses")

Method 2: Statistical Analysis

Augmented Dicky-Fuller Test

#H0: Data is non-stationary
#H1: Data is stationary
adf.test(log(Loss_data$`Adj Close`))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  log(Loss_data$`Adj Close`)
## Dickey-Fuller = -3.1559, Lag order = 3, p-value = 0.1162
## alternative hypothesis: stationary

Kwiatkowski–Phillips–Schmidt–Shin (KPSS) Test

#H0: Data is stationary
#H1: Data is non-stationary
kpss.test(log(Loss_data$`Adj Close`))
## Warning in kpss.test(log(Loss_data$`Adj Close`)): p-value smaller than printed
## p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  log(Loss_data$`Adj Close`)
## KPSS Level = 1.0539, Truncation lag parameter = 3, p-value = 0.01

Observation on First Order Differenced Data

Method 1: Analysis of Sample ACF and PACF

diff.data <- diff(log(Loss_data$`Adj Close`))
acf(diff.data, main="ACF of Differenced Average Losses")

pacf(diff.data, main="PACF of Differenced Average Losses")

Method 2: Statistical Analysis

Augmented Dicky-Fuller Test

#H0: Data is non-stationary
#H1: Data is stationary
adf.test(diff.data)
## Warning in adf.test(diff.data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff.data
## Dickey-Fuller = -5.3191, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary

Kwiatkowski–Phillips–Schmidt–Shin (KPSS) Test

#H0: Data is stationary
#H1: Data is non-stationary
kpss.test(diff.data)
## Warning in kpss.test(diff.data): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff.data
## KPSS Level = 0.066223, Truncation lag parameter = 3, p-value = 0.1

Model Building with Estimation of Parameters (Box-Jenkins)

Modelling with Stationary Time Series Model

Model X : ARMA(1,1)/ARIMA(1,0,1)

ModelX <- Arima(diff.data, order = c(1,0,1), method = "ML")
summary(ModelX)
## Series: diff.data 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ma1    mean
##       0.1631  -1.000  0.1128
## s.e.  0.1570   0.079  0.0144
## 
## sigma^2 = 1.126:  log likelihood = -63.74
## AIC=135.49   AICc=136.54   BIC=142.53
## 
## Training set error measures:
##                      ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.02347107 1.023486 0.7352977 -29.67115 153.4586 0.3922913
##                    ACF1
## Training set 0.01816958

Casuality and Invertibility Test for Model X

#The coefficients of Autoregressive Models and Moving Average models should fall within the unit root, |z| </= 1
autoplot(ModelX)

Modelling with Non-Stationary Time Series Model

Model Y : ARIMA(1,1,1)

ModelY <- Arima(log(Loss_data$`Adj Close`), order = c(1,1,1), method = "ML")
summary(ModelY)
## Series: log(Loss_data$`Adj Close`) 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.0401  -0.6421
## s.e.  0.2457   0.1892
## 
## sigma^2 = 1.421:  log likelihood = -67.78
## AIC=141.56   AICc=142.18   BIC=146.85
## 
## Training set error measures:
##                     ME     RMSE       MAE      MPE    MAPE      MASE
## Training set 0.3548077 1.150625 0.8787882 1.717231 5.01989 0.8000326
##                     ACF1
## Training set -0.06156119

Casuality and Invertibility Test for Model Y

#The coefficients of Autoregressive Models and Moving Average models should fall within the unit root, |z| </= 1
autoplot(ModelY)

Statistical Comparison for Both Models

Statistics <- c("Log-likelihood", "AICc")
Statistics
## [1] "Log-likelihood" "AICc"
#Statistics obtained ffrom Model X
ModelX.stats <- c(ModelX$loglik, ModelX$aicc)
ModelX.stats
## [1] -63.74448 136.54159
##Statistics obtained ffrom Model Y
ModelY.stats <- c(ModelY$loglik, ModelY$aicc)
ModelY.stats
## [1] -67.78211 142.17961
Model.Statistics <- data.frame(Statistics, ModelX.stats, ModelY.stats)
View(Model.Statistics)

Test of Non-Linearlity

Residual Check for ARIMA(1,1,1)

checkresiduals(ModelY)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)
## Q* = 3.597, df = 7, p-value = 0.8248
## 
## Model df: 2.   Total lags used: 9

White Noise Modelling

Model_w.noise <- Arima(log(Loss_data$'Adj Close'), order = c(0,1,0))
Model_w.noise.stats <- c(Model_w.noise$loglik, Model_w.noise$aicc)
Model_w.noise.stats
## [1] -73.82973 149.75703

Residual Check for White Noise Model [ARIMA(0,1,0)]

checkresiduals(Model_w.noise)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 11.923, df = 9, p-value = 0.2177
## 
## Model df: 0.   Total lags used: 9

Analysis of Properties of White Noise Model

Residual Plots

#Ordinary Residuals
plot(Model_w.noise$residuals, main="Residuals Plot")

#Squared Residuals
plot(Model_w.noise$residuals^2, main = "Squared Residuals Plot")

Test of IID Properties of White Noise (Independence and Identical Distribution)

Sample ACF & PACF of Absolute Residuals

residuals.abs <- abs(Model_w.noise$residuals)

acf(residuals.abs, main="ACF of Absolute Residuals")

pacf(residuals.abs, main="PACF of Absolute Residuals")

Sample ACF & PACF of Squared Residuals

residuals.squared <- (Model_w.noise$residuals^2)

acf(residuals.squared, main="ACF of Squared Residuals")

pacf(residuals.squared, main="PACF of Squared Residuals")

FORECASTING

Forecasting Linear Time Series Model (Model Y)

#Forecast of ModelY - ARIMA (1,1,1)
ModelY_f.cast <- forecast::forecast(ModelY, h = 10)
ModelY_f.cast
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 45       20.14943 18.62185 21.67701 17.81319 22.48566
## 46       20.14173 18.49758 21.78587 17.62722 22.65623
## 47       20.14142 18.40088 21.88195 17.47950 22.80333
## 48       20.14140 18.31001 21.97280 17.34053 22.94228
## 49       20.14140 18.22346 22.05934 17.20817 23.07464
## 50       20.14140 18.14066 22.14215 17.08153 23.20127
## 51       20.14140 18.06115 22.22166 16.95993 23.32287
## 52       20.14140 17.98457 22.29824 16.84281 23.44000
## 53       20.14140 17.91062 22.37219 16.72971 23.55310
## 54       20.14140 17.83904 22.44377 16.62024 23.66257
Forecasted <- exp(ModelY_f.cast$mean)
Forecasted
## Time Series:
## Start = 45 
## End = 54 
## Frequency = 1 
##  [1] 563358529 559036337 558863726 558856806 558856529 558856517 558856517
##  [8] 558856517 558856517 558856517
Upper <- exp(ModelY_f.cast$upper)
Upper
## Time Series:
## Start = 45 
## End = 54 
## Frequency = 1 
##           80%         95%
## 45 2595402586  5826369085
## 46 2893901806  6909962986
## 47 3185735982  8004995874
## 48 3488711425  9198257906
## 49 3804087604 10499961351
## 50 4132487945 11917507655
## 51 4474478189 13458471563
## 52 4830600323 15130753280
## 53 5201381941 16942607134
## 54 5587342264 18902662745
Lower <- exp(ModelY_f.cast$lower)
Lower
## Time Series:
## Start = 45 
## End = 54 
## Frequency = 1 
##          80%      95%
## 45 122282699 54471804
## 46 107993169 45227684
## 47  98039720 39016718
## 48  89523291 33954357
## 49  82101322 29744930
## 50  75576895 26206873
## 51  69800453 23206246
## 52  64654616 20641445
## 53  60045697 18434035
## 54  55897883 16522572
#Graph of Forecast for ARIMA (1,1,1)
plot(ModelY_f.cast, xlim = c(41, 50))