### PRACTICAL ACTUARIAL MODELLING.

#Import libraries.
#Dataframes.
#Import data.
# Basic data analysis.
#GARCH FAMILY MODELING.
# Modeling (regression,survival, time series)
# Statistics (ANOVA)

#Libraries
library(tidyverse)
library(fGarch)
library(rugarch)
library(survival)
library(survminer)

## Dataframes in R
x <- c(2, 4, 6, 8, 10)
y <- c(1, 3, 6, 9, 12)
z <- c(12, 4, 7, 1, 24)

Dataframe <- data.frame(x, y, z); Dataframe
cor(x, y)
cor(y,z)
cor(x, z)
# correlation between the two variables.

#regression analysis.
reg_1 <- lm(x~y); reg_1
reg_2 <- lm(x~y + z); reg_2

# plot for individual variable.
plot(x)
hist(x)

# plot for linear model of the variables.
plot(reg_1)
plot(reg_2)
anova(reg_2)
summary(reg_2)


## Importing data.
data <- read.csv("supermarket_sales.csv"); data
View(data)
glimpse(data)

sales <- data$Sales; sales # sales column.
head(sales) # getting the first values in the data.
length(sales) # number of records.

#Basic Statistics.
min(sales)
max(sales)
mean(sales)
var(sales) # variance
sd(sales) # standard deviation 

# visualization
hist(sales, col = "purple")
ts.plot(sales)
boxplot(Sales)
summary(sales) # five number summary.

# Removing Outliers.
iqr <- IQR(sales); iqr
lower_bound <- quantile(sales, 0.25) - 1.5 * iqr ;lower_bound
upper_bound <- quantile(sales, 0.75) + 1.5 * iqr ;upper_bound

data_clean <- data[!(data$Sales < lower_bound | data$Sales > upper_bound), ];data_clean
sales<- data_clean$Sales

pacf(sales)
acf(sales)


# Modelling using garch model.
model_1 <- garchFit(formula = ~garch(1,1), data = sales, trace = F); model_1
model_2 <- garchFit(formula = ~garch(1,0), data = sales, trace = F); model_2
model_3 <- garchFit(formula = ~garch(2,1), data = sales, trace = F); model_3
model_4 <- garchFit(formula = ~garch(2,2), data = sales, trace = F); model_4
model_5 <- garchFit(formula = ~garch(1,2), data = sales, trace = F); model_5



summary(model_1) # AIC = 13.75656 **
summary(model_2) # AIC = 13.75503 *       -least AIC
summary(model_3) # AIC = 13.75846 ***
summary(model_4) # AIC = 13.75962 *****   - highest AIC
summary(model_5) # AIC = 13.75856 ****

## Forecasting.

# variance models- gjrgarch, sgarch, egarch,
# distribution models - ged, std, norm

m1 <- ugarchspec(variance.model = list(model="gjrGARCH",garchOrder= c(1,1)),
                 mean.model = list(armaOrder= c(1,1),include.mean=F),
                 distribution.model= "norm");m1 # optimal model

m1fit <- ugarchfit(data=sales , spec = m1); m1fit

summary(m1)
# AIC = 13.797, sgarch,(1,1),ged **
# AIC = 13.799, sgarch,(1,2),ged
# AIC = 13.800, sgarch,(2, 1),ged
# AIC = 13.800 sgarch, (2,2) ,ged

# AIC = 13.793, gjrgarch,(1, 1),std **
# AIC = 14.077, gjrgarch,(2, 0),std
# AIC = 13.803, gjrgarch,(2, 1),std

# AIC = 13.796, gjrgarch,(2, 1), norm
# AIC = 13.791, gjrgarch,(1, 1), norm **

## Forecsting.
forecast <- ugarchforecast(m1fit, n.ahead = 60); forecast

forc1 <- ugarchforecast(m1fit, n.ahead = 60, method= c("Partial", "Full")[1]); forc1

## visualizing the prediction
plot(forecast, which = 1)
plot(forecast, which = 3)

plot(forc1, which = 1)
plot(forc1, which = 3)

## Survival Model Analysis.
# using the rats data on our notes.
d1 <- data.frame(time=c(3, 4, 6, 11, 17, 21, 24, 25, 26, 30),
                 event=c(1, 1, 0, 0, 0, 1, 0, 0, 1, 0)); d1

kaplan_m_curv <- with(d1, Surv(time, event)); kaplan_m_curv

summary(kaplan_m_curv)
plot(kaplan_m_curv) # the vertical axis indicates the survival probabilities.

plot(kaplan_m_curv, fun="cumhaz")