master=read.csv("data.csv")
str(master)
## 'data.frame':    1226 obs. of  7 variables:
##  $ Date         : Factor w/ 1226 levels "2012-01-04","2012-01-05",..: 1226 1225 1224 1223 1222 1221 1220 1219 1218 1217 ...
##  $ Open         : int  42120 43000 43940 43140 43310 43660 43900 42910 42790 43350 ...
##  $ High         : int  42330 43220 43970 43700 43660 43840 44370 43630 43150 43550 ...
##  $ Low          : int  41700 42540 43270 43140 43090 43190 43610 42860 42740 42810 ...
##  $ Close        : int  41830 42660 43270 43620 43340 43480 44000 43620 43130 43130 ...
##  $ Volume       : int  610000 448400 339900 400100 358200 381600 658900 499400 358700 542000 ...
##  $ Stock.Trading: num  2.56e+10 1.92e+10 1.48e+10 1.74e+10 1.55e+10 ...
head(master)
##         Date  Open  High   Low Close Volume Stock.Trading
## 1 2016-12-30 42120 42330 41700 41830 610000   25628028000
## 2 2016-12-29 43000 43220 42540 42660 448400   19188227000
## 3 2016-12-28 43940 43970 43270 43270 339900   14780670000
## 4 2016-12-27 43140 43700 43140 43620 400100   17427993000
## 5 2016-12-26 43310 43660 43090 43340 358200   15547803000
## 6 2016-12-22 43660 43840 43190 43480 381600   16586491000
summary(master)
##          Date           Open            High            Low       
##  2012-01-04:   1   Min.   :13720   Min.   :13840   Min.   :13600  
##  2012-01-05:   1   1st Qu.:27789   1st Qu.:28091   1st Qu.:27401  
##  2012-01-06:   1   Median :34445   Median :34835   Median :33925  
##  2012-01-10:   1   Mean   :33754   Mean   :34179   Mean   :33348  
##  2012-01-11:   1   3rd Qu.:41412   3rd Qu.:41900   3rd Qu.:40810  
##  2012-01-12:   1   Max.   :61550   Max.   :61970   Max.   :60740  
##  (Other)   :1220                                                  
##      Close           Volume        Stock.Trading      
##  Min.   :13720   Min.   : 139100   Min.   :3.966e+09  
##  1st Qu.:27675   1st Qu.: 487300   1st Qu.:1.454e+10  
##  Median :34412   Median : 626000   Median :2.154e+10  
##  Mean   :33761   Mean   : 727556   Mean   :2.441e+10  
##  3rd Qu.:41365   3rd Qu.: 826700   3rd Qu.:3.016e+10  
##  Max.   :61930   Max.   :4937300   Max.   :1.460e+11  
## 
sum(is.na(master))
## [1] 0
library(lubridate)  # Date & Time
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(plotly)     # Visualisation
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(TTR)        # Time series
library(tseries)    # Time series
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)   # Forecasting
library(fpp2)
## Loading required package: fma
## Loading required package: expsmooth
library(autoplotly)
## Registered S3 methods overwritten by 'ggfortify':
##   method                 from    
##   autoplot.Arima         forecast
##   autoplot.acf           forecast
##   autoplot.ar            forecast
##   autoplot.bats          forecast
##   autoplot.decomposed.ts forecast
##   autoplot.ets           forecast
##   autoplot.forecast      forecast
##   autoplot.stl           forecast
##   autoplot.ts            forecast
##   fitted.ar              forecast
##   fortify.ts             forecast
##   residuals.ar           forecast
library(ggplot2)

qqnorm(master$Close)
qqline(master$Close,col = "steelblue", lwd = 2)

boxplot(master$Close)

#Full picture of the stock movement, we can clear see that the stock price 
# is not stationary and can not be represented using know functions like Sin(x)
# Therefore time series analysis can be applied to this data set 

plot_ly(data = master, x = ~Date, type = 'candlestick', name = 'Uniqlo',
        open = ~Open, close = ~Close, high = ~High, low = ~Low) %>%
  layout(title = "Uniclo Market 2012-2016")
# simplified stock movement
plot(master$Close, type = "l")  
# check the trend of the data. use abline function to show the avarage of the data point, as 
# we can see the mean is decreasing and the variace from each piont 
# to the mean is not equal
abline(reg = lm(master$Close~time(master$Date)))

# in order to apply the time series, need to transform the data and 
#makes the mean, the variation become equal
# use log function to make variance equal
# ues diff function to make mean equal
plot(diff(log(master$Close)), type = "l")

# as shown in the plot, the both the variance and the mean are equal
# therefore, the dataset is ready to apply time series analysis using AMIMA model

#time plot: we need to define q, d and p value
acf(diff(log(master$Close))) # q is 1 because the zero line and first line is out of boundary

pacf(diff(log(master$Close))) # p is 0 because the zero line is the only one out of the boundary

# since we apply the diff function once, the d value is 1

# use fit the data to ARIMA model- data hs to be stationary
fit_arima= auto.arima(master$Close, d=1, D=1)
fit= arima(log(master$Close), order=c(1,0,1),seasonal = list(order=c(1,0,1), period=NA))
print(summary(fit_arima))
## Series: master$Close 
## ARIMA(2,1,0) 
## 
## Coefficients:
##           ar1     ar2
##       -0.0809  0.0643
## s.e.   0.0285  0.0285
## 
## sigma^2 estimated as 689155:  log likelihood=-9971.18
## AIC=19948.36   AICc=19948.38   BIC=19963.69
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE    MAPE      MASE
## Training set -23.2782 829.1372 575.1811 -0.1214167 1.69787 0.9899081
##                      ACF1
## Training set -0.001742805
##                    ME     RMSE      MAE        MPE    MAPE      MASE
## Training set -23.2782 829.1372 575.1811 -0.1214167 1.69787 0.9899081
##                      ACF1
## Training set -0.001742805
checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)
## Q* = 6.8249, df = 8, p-value = 0.5556
## 
## Model df: 2.   Total lags used: 10
# use fit ETS model  df = 8
fit_ets= ets(master$Close)
print(summary(fit_ets))
## ETS(M,N,N) 
## 
## Call:
##  ets(y = master$Close) 
## 
##   Smoothing parameters:
##     alpha = 0.9261 
## 
##   Initial states:
##     l = 43206.71 
## 
##   sigma:  0.024
## 
##      AIC     AICc      BIC 
## 25000.27 25000.29 25015.60 
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE    MAPE     MASE
## Training set -25.9646 832.0867 578.9932 -0.1318181 1.70632 0.996469
##                     ACF1
## Training set -0.01008866
##                    ME     RMSE      MAE        MPE    MAPE     MASE
## Training set -25.9646 832.0867 578.9932 -0.1318181 1.70632 0.996469
##                     ACF1
## Training set -0.01008866
checkresiduals(fit_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,N,N)
## Q* = 8.0032, df = 8, p-value = 0.4332
## 
## Model df: 2.   Total lags used: 10