master=read.csv("data.csv")
str(master)
## 'data.frame': 1226 obs. of 7 variables:
## $ Date : Factor w/ 1226 levels "2012-01-04","2012-01-05",..: 1226 1225 1224 1223 1222 1221 1220 1219 1218 1217 ...
## $ Open : int 42120 43000 43940 43140 43310 43660 43900 42910 42790 43350 ...
## $ High : int 42330 43220 43970 43700 43660 43840 44370 43630 43150 43550 ...
## $ Low : int 41700 42540 43270 43140 43090 43190 43610 42860 42740 42810 ...
## $ Close : int 41830 42660 43270 43620 43340 43480 44000 43620 43130 43130 ...
## $ Volume : int 610000 448400 339900 400100 358200 381600 658900 499400 358700 542000 ...
## $ Stock.Trading: num 2.56e+10 1.92e+10 1.48e+10 1.74e+10 1.55e+10 ...
head(master)
## Date Open High Low Close Volume Stock.Trading
## 1 2016-12-30 42120 42330 41700 41830 610000 25628028000
## 2 2016-12-29 43000 43220 42540 42660 448400 19188227000
## 3 2016-12-28 43940 43970 43270 43270 339900 14780670000
## 4 2016-12-27 43140 43700 43140 43620 400100 17427993000
## 5 2016-12-26 43310 43660 43090 43340 358200 15547803000
## 6 2016-12-22 43660 43840 43190 43480 381600 16586491000
summary(master)
## Date Open High Low
## 2012-01-04: 1 Min. :13720 Min. :13840 Min. :13600
## 2012-01-05: 1 1st Qu.:27789 1st Qu.:28091 1st Qu.:27401
## 2012-01-06: 1 Median :34445 Median :34835 Median :33925
## 2012-01-10: 1 Mean :33754 Mean :34179 Mean :33348
## 2012-01-11: 1 3rd Qu.:41412 3rd Qu.:41900 3rd Qu.:40810
## 2012-01-12: 1 Max. :61550 Max. :61970 Max. :60740
## (Other) :1220
## Close Volume Stock.Trading
## Min. :13720 Min. : 139100 Min. :3.966e+09
## 1st Qu.:27675 1st Qu.: 487300 1st Qu.:1.454e+10
## Median :34412 Median : 626000 Median :2.154e+10
## Mean :33761 Mean : 727556 Mean :2.441e+10
## 3rd Qu.:41365 3rd Qu.: 826700 3rd Qu.:3.016e+10
## Max. :61930 Max. :4937300 Max. :1.460e+11
##
sum(is.na(master))
## [1] 0
library(lubridate) # Date & Time
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(plotly) # Visualisation
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(TTR) # Time series
library(tseries) # Time series
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast) # Forecasting
library(fpp2)
## Loading required package: fma
## Loading required package: expsmooth
library(autoplotly)
## Registered S3 methods overwritten by 'ggfortify':
## method from
## autoplot.Arima forecast
## autoplot.acf forecast
## autoplot.ar forecast
## autoplot.bats forecast
## autoplot.decomposed.ts forecast
## autoplot.ets forecast
## autoplot.forecast forecast
## autoplot.stl forecast
## autoplot.ts forecast
## fitted.ar forecast
## fortify.ts forecast
## residuals.ar forecast
library(ggplot2)
qqnorm(master$Close)
qqline(master$Close,col = "steelblue", lwd = 2)

boxplot(master$Close)

#Full picture of the stock movement, we can clear see that the stock price
# is not stationary and can not be represented using know functions like Sin(x)
# Therefore time series analysis can be applied to this data set
plot_ly(data = master, x = ~Date, type = 'candlestick', name = 'Uniqlo',
open = ~Open, close = ~Close, high = ~High, low = ~Low) %>%
layout(title = "Uniclo Market 2012-2016")
# simplified stock movement
plot(master$Close, type = "l")
# check the trend of the data. use abline function to show the avarage of the data point, as
# we can see the mean is decreasing and the variace from each piont
# to the mean is not equal
abline(reg = lm(master$Close~time(master$Date)))

# in order to apply the time series, need to transform the data and
#makes the mean, the variation become equal
# use log function to make variance equal
# ues diff function to make mean equal
plot(diff(log(master$Close)), type = "l")

# as shown in the plot, the both the variance and the mean are equal
# therefore, the dataset is ready to apply time series analysis using AMIMA model
#time plot: we need to define q, d and p value
acf(diff(log(master$Close))) # q is 1 because the zero line and first line is out of boundary

pacf(diff(log(master$Close))) # p is 0 because the zero line is the only one out of the boundary

# since we apply the diff function once, the d value is 1
# use fit the data to ARIMA model- data hs to be stationary
fit_arima= auto.arima(master$Close, d=1, D=1)
fit= arima(log(master$Close), order=c(1,0,1),seasonal = list(order=c(1,0,1), period=NA))
print(summary(fit_arima))
## Series: master$Close
## ARIMA(2,1,0)
##
## Coefficients:
## ar1 ar2
## -0.0809 0.0643
## s.e. 0.0285 0.0285
##
## sigma^2 estimated as 689155: log likelihood=-9971.18
## AIC=19948.36 AICc=19948.38 BIC=19963.69
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -23.2782 829.1372 575.1811 -0.1214167 1.69787 0.9899081
## ACF1
## Training set -0.001742805
## ME RMSE MAE MPE MAPE MASE
## Training set -23.2782 829.1372 575.1811 -0.1214167 1.69787 0.9899081
## ACF1
## Training set -0.001742805
checkresiduals(fit_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)
## Q* = 6.8249, df = 8, p-value = 0.5556
##
## Model df: 2. Total lags used: 10
# use fit ETS model df = 8
fit_ets= ets(master$Close)
print(summary(fit_ets))
## ETS(M,N,N)
##
## Call:
## ets(y = master$Close)
##
## Smoothing parameters:
## alpha = 0.9261
##
## Initial states:
## l = 43206.71
##
## sigma: 0.024
##
## AIC AICc BIC
## 25000.27 25000.29 25015.60
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -25.9646 832.0867 578.9932 -0.1318181 1.70632 0.996469
## ACF1
## Training set -0.01008866
## ME RMSE MAE MPE MAPE MASE
## Training set -25.9646 832.0867 578.9932 -0.1318181 1.70632 0.996469
## ACF1
## Training set -0.01008866
checkresiduals(fit_ets)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,N)
## Q* = 8.0032, df = 8, p-value = 0.4332
##
## Model df: 2. Total lags used: 10