### PRACTICAL ACTUARIAL MODELLING.
#Import libraries.
#Dataframes.
#Import data.
# Basic data analysis.
#GARCH FAMILY MODELING.
# Modeling (regression,survival, time series)
# Statistics (ANOVA)
#Libraries
library(tidyverse)
library(fGarch)
library(rugarch)
library(survival)
library(survminer)
## Dataframes in R
x <- c(2, 4, 6, 8, 10)
y <- c(1, 3, 6, 9, 12)
z <- c(12, 4, 7, 1, 24)
Dataframe <- data.frame(x, y, z); Dataframe
cor(x, y)
cor(y,z)
cor(x, z)
# correlation between the two variables.
#regression analysis.
reg_1 <- lm(x~y); reg_1
reg_2 <- lm(x~y + z); reg_2
# plot for individual variable.
plot(x)
hist(x)
# plot for linear model of the variables.
plot(reg_1)
plot(reg_2)
anova(reg_2)
summary(reg_2)
## Importing data.
data <- read.csv("supermarket_sales.csv"); data
View(data)
glimpse(data)
sales <- data$Sales; sales # sales column.
head(sales) # getting the first values in the data.
length(sales) # number of records.
#Basic Statistics.
min(sales)
max(sales)
mean(sales)
var(sales) # variance
sd(sales) # standard deviation
# visualization
hist(sales, col = "purple")
ts.plot(sales)
boxplot(Sales)
summary(sales) # five number summary.
# Removing Outliers.
iqr <- IQR(sales); iqr
lower_bound <- quantile(sales, 0.25) - 1.5 * iqr ;lower_bound
upper_bound <- quantile(sales, 0.75) + 1.5 * iqr ;upper_bound
data_clean <- data[!(data$Sales < lower_bound | data$Sales > upper_bound), ];data_clean
sales<- data_clean$Sales
pacf(sales)
acf(sales)
# Modelling using garch model.
model_1 <- garchFit(formula = ~garch(1,1), data = sales, trace = F); model_1
model_2 <- garchFit(formula = ~garch(1,0), data = sales, trace = F); model_2
model_3 <- garchFit(formula = ~garch(2,1), data = sales, trace = F); model_3
model_4 <- garchFit(formula = ~garch(2,2), data = sales, trace = F); model_4
model_5 <- garchFit(formula = ~garch(1,2), data = sales, trace = F); model_5
summary(model_1) # AIC = 13.75656 **
summary(model_2) # AIC = 13.75503 * -least AIC
summary(model_3) # AIC = 13.75846 ***
summary(model_4) # AIC = 13.75962 ***** - highest AIC
summary(model_5) # AIC = 13.75856 ****
## Forecasting.
# variance models- gjrgarch, sgarch, egarch,
# distribution models - ged, std, norm
m1 <- ugarchspec(variance.model = list(model="gjrGARCH",garchOrder= c(1,1)),
mean.model = list(armaOrder= c(1,1),include.mean=F),
distribution.model= "norm");m1 # optimal model
m1fit <- ugarchfit(data=sales , spec = m1); m1fit
summary(m1)
# AIC = 13.797, sgarch,(1,1),ged **
# AIC = 13.799, sgarch,(1,2),ged
# AIC = 13.800, sgarch,(2, 1),ged
# AIC = 13.800 sgarch, (2,2) ,ged
# AIC = 13.793, gjrgarch,(1, 1),std **
# AIC = 14.077, gjrgarch,(2, 0),std
# AIC = 13.803, gjrgarch,(2, 1),std
# AIC = 13.796, gjrgarch,(2, 1), norm
# AIC = 13.791, gjrgarch,(1, 1), norm **
## Forecsting.
forecast <- ugarchforecast(m1fit, n.ahead = 60); forecast
forc1 <- ugarchforecast(m1fit, n.ahead = 60, method= c("Partial", "Full")[1]); forc1
## visualizing the prediction
plot(forecast, which = 1)
plot(forecast, which = 3)
plot(forc1, which = 1)
plot(forc1, which = 3)
## Survival Model Analysis.
# using the rats data on our notes.
d1 <- data.frame(time=c(3, 4, 6, 11, 17, 21, 24, 25, 26, 30),
event=c(1, 1, 0, 0, 0, 1, 0, 0, 1, 0)); d1
kaplan_m_curv <- with(d1, Surv(time, event)); kaplan_m_curv
summary(kaplan_m_curv)
plot(kaplan_m_curv) # the vertical axis indicates the survival probabilities.
plot(kaplan_m_curv, fun="cumhaz")