library(readxl)
library(TSA)
## Warning: package 'TSA' was built under R version 4.5.1
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.1
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.5.2
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.Arima TSA
## plot.Arima TSA
library(lattice)
## Warning: package 'lattice' was built under R version 4.5.2
library(ggplot2)
library(lattice)
library(StatDA)
## Warning: package 'StatDA' was built under R version 4.5.1
## Loading required package: sgeostat
##
## Attaching package: 'sgeostat'
## The following object is masked from 'package:TSA':
##
## lagplot
## Registered S3 method overwritten by 'geoR':
## method from
## plot.variogram sgeostat
library(rugarch)
## Warning: package 'rugarch' was built under R version 4.5.2
## Loading required package: parallel
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.5.2
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(gridExtra)
library(knitr)
library(FinTS)
## Warning: package 'FinTS' was built under R version 4.5.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.5.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'FinTS'
## The following object is masked from 'package:forecast':
##
## Acf
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Import from excel
Data<- read_excel("C:/Users/HUAWEI/Desktop/Year 2 SEM 2/ARECA Case Competition/soong.xlsx")
# Fix the data cleaning - convert to numeric first
Data <- Data %>%
mutate(
# Convert to numeric, handling any conversion issues
Original_Loss_Numeric = as.numeric(`ORIGINAL LOSS VALUE`),
) %>%
group_by(Year) %>%
summarise(
Total_Original_Loss = sum(Original_Loss_Numeric, na.rm = TRUE),
Event_Count = n(),
Avg_Loss_Per_Event = Total_Original_Loss / Event_Count
) %>%
ungroup() %>%
filter(Year >= 1980)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Original_Loss_Numeric = as.numeric(`ORIGINAL LOSS VALUE`)`.
## Caused by warning:
## ! NAs introduced by coercion
View(Data)
Loss_data<-Data[-c(1,2),]
View(Loss_data)
colnames(Loss_data)<- c("Date", "Ori Loss", "Number of Event", "Adj Close")
#Obtaining Daily Returns
returns <- diff(log(Loss_data$`Adj Close`))
ggplot(data = Loss_data, aes(x = Date, y = `Adj Close`)) + geom_line(colour = "blue", size = 0.5) + labs(title = "Trend of Average Losses from 1982 - 2025", x = "Date", y = "Average Losses (per Event)(Millions, $)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ts.plot(returns, gpars = list(xlab="Time", ylab = "Daily Returns (%)", main = "Yearly Fluctuations of Average Losses"))
hist(returns, col = "lightblue", border = "black", prob = TRUE, xlab = "Daily Returns (%)", main = "Density of Average Losses")
lines(density(returns), lwd = 2, col = "red")
#Format Conversion for Dates
str(Loss_data)
## tibble [44 × 4] (S3: tbl_df/tbl/data.frame)
## $ Date : num [1:44] 1982 1983 1984 1985 1986 ...
## $ Ori Loss : num [1:44] 1.00e+07 1.88e+08 1.05e+08 2.85e+08 2.15e+08 ...
## $ Number of Event: int [1:44] 12 17 18 14 19 10 11 4 8 15 ...
## $ Adj Close : num [1:44] 833333 11058824 5833333 20357143 11315789 ...
qqplot.das(returns, distribution = "norm", ylab = "Sample quantiles",
xlab = "Theoretical Quantiles", main = "Normal Q-Q Plot Average Losses Yearly Fluctuations (1982 - 2025)", las = par("las"),
datax = FALSE, envelope = 0.95, labels = FALSE, col = "red",
lwd = 2, pch = 20,line = c("quartiles","robust","none"), cex = 1,
xaxt = "s", add.plot = FALSE, xlim = NULL, ylim = NULL)
acf(log(Loss_data$`Adj Close`), main="ACF of Average Losses")
pacf(log(Loss_data$`Adj Close`), main="PACF of Average Losses")
#H0: Data is non-stationary
#H1: Data is stationary
adf.test(log(Loss_data$`Adj Close`))
##
## Augmented Dickey-Fuller Test
##
## data: log(Loss_data$`Adj Close`)
## Dickey-Fuller = -3.1559, Lag order = 3, p-value = 0.1162
## alternative hypothesis: stationary
#H0: Data is stationary
#H1: Data is non-stationary
kpss.test(log(Loss_data$`Adj Close`))
## Warning in kpss.test(log(Loss_data$`Adj Close`)): p-value smaller than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: log(Loss_data$`Adj Close`)
## KPSS Level = 1.0539, Truncation lag parameter = 3, p-value = 0.01
diff.data <- diff(log(Loss_data$`Adj Close`))
acf(diff.data, main="ACF of Differenced Average Losses")
pacf(diff.data, main="PACF of Differenced Average Losses")
#H0: Data is non-stationary
#H1: Data is stationary
adf.test(diff.data)
## Warning in adf.test(diff.data): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff.data
## Dickey-Fuller = -5.3191, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
#H0: Data is stationary
#H1: Data is non-stationary
kpss.test(diff.data)
## Warning in kpss.test(diff.data): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff.data
## KPSS Level = 0.066223, Truncation lag parameter = 3, p-value = 0.1
ModelX <- Arima(diff.data, order = c(1,0,1), method = "ML")
summary(ModelX)
## Series: diff.data
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.1631 -1.000 0.1128
## s.e. 0.1570 0.079 0.0144
##
## sigma^2 = 1.126: log likelihood = -63.74
## AIC=135.49 AICc=136.54 BIC=142.53
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02347107 1.023486 0.7352977 -29.67115 153.4586 0.3922913
## ACF1
## Training set 0.01816958
#The coefficients of Autoregressive Models and Moving Average models should fall within the unit root, |z| </= 1
autoplot(ModelX)
ModelY <- Arima(log(Loss_data$`Adj Close`), order = c(1,1,1), method = "ML")
summary(ModelY)
## Series: log(Loss_data$`Adj Close`)
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.0401 -0.6421
## s.e. 0.2457 0.1892
##
## sigma^2 = 1.421: log likelihood = -67.78
## AIC=141.56 AICc=142.18 BIC=146.85
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3548077 1.150625 0.8787882 1.717231 5.01989 0.8000326
## ACF1
## Training set -0.06156119
#The coefficients of Autoregressive Models and Moving Average models should fall within the unit root, |z| </= 1
autoplot(ModelY)
Statistics <- c("Log-likelihood", "AICc")
Statistics
## [1] "Log-likelihood" "AICc"
#Statistics obtained ffrom Model X
ModelX.stats <- c(ModelX$loglik, ModelX$aicc)
ModelX.stats
## [1] -63.74448 136.54159
##Statistics obtained ffrom Model Y
ModelY.stats <- c(ModelY$loglik, ModelY$aicc)
ModelY.stats
## [1] -67.78211 142.17961
Model.Statistics <- data.frame(Statistics, ModelX.stats, ModelY.stats)
View(Model.Statistics)
checkresiduals(ModelY)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)
## Q* = 3.597, df = 7, p-value = 0.8248
##
## Model df: 2. Total lags used: 9
Model_w.noise <- Arima(log(Loss_data$'Adj Close'), order = c(0,1,0))
Model_w.noise.stats <- c(Model_w.noise$loglik, Model_w.noise$aicc)
Model_w.noise.stats
## [1] -73.82973 149.75703
checkresiduals(Model_w.noise)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 11.923, df = 9, p-value = 0.2177
##
## Model df: 0. Total lags used: 9
#Ordinary Residuals
plot(Model_w.noise$residuals, main="Residuals Plot")
#Squared Residuals
plot(Model_w.noise$residuals^2, main = "Squared Residuals Plot")
residuals.abs <- abs(Model_w.noise$residuals)
acf(residuals.abs, main="ACF of Absolute Residuals")
pacf(residuals.abs, main="PACF of Absolute Residuals")
residuals.squared <- (Model_w.noise$residuals^2)
acf(residuals.squared, main="ACF of Squared Residuals")
pacf(residuals.squared, main="PACF of Squared Residuals")
#Forecast of ModelY - ARIMA (1,1,1)
ModelY_f.cast <- forecast::forecast(ModelY, h = 10)
ModelY_f.cast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 45 20.14943 18.62185 21.67701 17.81319 22.48566
## 46 20.14173 18.49758 21.78587 17.62722 22.65623
## 47 20.14142 18.40088 21.88195 17.47950 22.80333
## 48 20.14140 18.31001 21.97280 17.34053 22.94228
## 49 20.14140 18.22346 22.05934 17.20817 23.07464
## 50 20.14140 18.14066 22.14215 17.08153 23.20127
## 51 20.14140 18.06115 22.22166 16.95993 23.32287
## 52 20.14140 17.98457 22.29824 16.84281 23.44000
## 53 20.14140 17.91062 22.37219 16.72971 23.55310
## 54 20.14140 17.83904 22.44377 16.62024 23.66257
Forecasted <- exp(ModelY_f.cast$mean)
Forecasted
## Time Series:
## Start = 45
## End = 54
## Frequency = 1
## [1] 563358529 559036337 558863726 558856806 558856529 558856517 558856517
## [8] 558856517 558856517 558856517
Upper <- exp(ModelY_f.cast$upper)
Upper
## Time Series:
## Start = 45
## End = 54
## Frequency = 1
## 80% 95%
## 45 2595402586 5826369085
## 46 2893901806 6909962986
## 47 3185735982 8004995874
## 48 3488711425 9198257906
## 49 3804087604 10499961351
## 50 4132487945 11917507655
## 51 4474478189 13458471563
## 52 4830600323 15130753280
## 53 5201381941 16942607134
## 54 5587342264 18902662745
Lower <- exp(ModelY_f.cast$lower)
Lower
## Time Series:
## Start = 45
## End = 54
## Frequency = 1
## 80% 95%
## 45 122282699 54471804
## 46 107993169 45227684
## 47 98039720 39016718
## 48 89523291 33954357
## 49 82101322 29744930
## 50 75576895 26206873
## 51 69800453 23206246
## 52 64654616 20641445
## 53 60045697 18434035
## 54 55897883 16522572
#Graph of Forecast for ARIMA (1,1,1)
plot(ModelY_f.cast, xlim = c(41, 50))