1. Basic statistical analysis for stock scores Jul-Dec 2018

stock = read.csv(file= "/ap5/AUDIT/liqiuy01/Statistical Research/time-series-data.csv", sep = ",", header = TRUE)
year = substr(stock$transactiondate,1,4)
month = substr(stock$transactiondate,5,6)
day = substr(stock$transactiondate,7,8)
stock_date = paste0(year,"/",month,"/",day)
date = as.Date(stock_date)
new_stock = cbind(as.numeric(stock$score),format(date,"%m/%d/%Y"))
new_stock = as.data.frame(new_stock)
colnames(new_stock)=c("score", "date")
means <- aggregate(as.numeric(score) ~ date, new_stock, mean)
head(means)
##         date as.numeric(score)
## 1 07/02/2018          430.0724
## 2 07/03/2018          431.8398
## 3 07/04/2018          272.0000
## 4 07/05/2018          423.2860
## 5 07/06/2018          430.4893
## 6 07/07/2018          335.0000
plot(means, ylab = "daily avg score", "stock daily average score")
abline(h = c(200, 380), col = c("blue", "red"))

counts<- aggregate(as.numeric(score) ~ date, new_stock,length)
head(counts)
##         date as.numeric(score)
## 1 07/02/2018              2087
## 2 07/03/2018              1860
## 3 07/04/2018                 1
## 4 07/05/2018              3318
## 5 07/06/2018               985
## 6 07/07/2018                 4
plot(counts, ylab = "daily transaction count", main = "stock daily transaction count")
abline(h = c(200, 4000), col = c("blue", "red"))

2. Time Series Analysis on stock Average Scores from Jul-Dec 2018

library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
zoo_score = zoo(means[,2], seq(from = as.Date("2018-07-02"), to = as.Date("2018-12-31"), by = 1))
plot.zoo(zoo_score,main="Time Series Plot of Score", ylab = "Daily Avg Score")

library(forecast)
model1 = auto.arima(zoo_score)
summary(model1)
## Series: zoo_score 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.9672
## s.e.   0.0216
## 
## sigma^2 estimated as 1907:  log likelihood=-946.48
## AIC=1896.97   AICc=1897.03   BIC=1903.37
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -1.666771 43.43442 28.12934 -1.940551 8.074246 0.8061097
##                    ACF1
## Training set 0.06174204
tsdiag(model1,gof=20)

Box.test(model1$resid, lag = 20, type = "Ljung", fitdf = 2)### P-value=0.005837, there is serial correlation 
## 
##  Box-Ljung test
## 
## data:  model1$resid
## X-squared = 36.686, df = 18, p-value = 0.005757
Box.test(model1$resid^2, lag = 20, type = "Ljung", fitdf = 2)####ARCH EFFECT, p_value = 0.1373, there is arch effect
## 
##  Box-Ljung test
## 
## data:  model1$resid^2
## X-squared = 20.653, df = 18, p-value = 0.2973

3. Use Garch Models: in order to fix serial correlation and arch effect

require(rugarch)
## Loading required package: rugarch
## Loading required package: parallel
## 
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
## 
##     sigma
spec7=ugarchspec(variance.model=list(model="fGARCH",submodel="TGARCH",garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="std")
fit7=ugarchfit(zoo_score,spec=spec7)
show(fit7)
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : fGARCH(1,1)
## fGARCH Sub-Model : TGARCH
## Mean Model   : ARFIMA(2,0,2)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error    t value Pr(>|t|)
## mu     410.633802    1.407600  291.72629 0.000000
## ar1     -0.884774    0.001161 -762.33850 0.000000
## ar2     -1.047637    0.001534 -683.11312 0.000000
## ma1      0.887608    0.001256  706.79744 0.000000
## ma2      1.048684    0.001207  869.07614 0.000000
## omega    0.665507    1.162826    0.57232 0.567106
## alpha1   0.088923    0.050669    1.75497 0.079264
## beta1    0.922889    0.034411   26.81986 0.000000
## eta11    1.000000    0.706077    1.41627 0.156695
## shape    2.212202    0.112624   19.64229 0.000000
## 
## Robust Standard Errors:
##          Estimate  Std. Error    t value Pr(>|t|)
## mu     410.633802    2.335907  175.79204 0.000000
## ar1     -0.884774    0.001255 -705.12335 0.000000
## ar2     -1.047637    0.002015 -519.83119 0.000000
## ma1      0.887608    0.001365  650.15252 0.000000
## ma2      1.048684    0.001541  680.57692 0.000000
## omega    0.665507    1.429395    0.46559 0.641511
## alpha1   0.088923    0.044089    2.01692 0.043703
## beta1    0.922889    0.028948   31.88040 0.000000
## eta11    1.000000    0.969282    1.03169 0.302217
## shape    2.212202    0.121191   18.25391 0.000000
## 
## LogLikelihood : -900.9015 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike        9.9552
## Bayes        10.1306
## Shibata       9.9496
## Hannan-Quinn 10.0263
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic   p-value
## Lag[1]                      0.1174 7.319e-01
## Lag[2*(p+q)+(p+q)-1][11]    9.7205 7.531e-08
## Lag[4*(p+q)+(p+q)-1][19]   14.8657 3.164e-02
## d.o.f=4
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                   0.004597  0.9459
## Lag[2*(p+q)+(p+q)-1][5]  1.542359  0.7295
## Lag[4*(p+q)+(p+q)-1][9]  3.018147  0.7560
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.1766 0.500 2.000  0.6743
## ARCH Lag[5]    0.9388 1.440 1.667  0.7512
## ARCH Lag[7]    1.9694 2.315 1.543  0.7238
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  5.6777
## Individual Statistics:              
## mu     2.87919
## ar1    0.01632
## ar2    0.01749
## ma1    0.01675
## ma2    0.01722
## omega  0.11235
## alpha1 0.11907
## beta1  0.12769
## eta11  0.09483
## shape  0.20883
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          2.29 2.54 3.05
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias          0.80665 0.4209    
## Negative Sign Bias 0.33244 0.7399    
## Positive Sign Bias 0.08712 0.9307    
## Joint Effect       0.87565 0.8313    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     70.22    8.442e-08
## 2    30    102.74    3.551e-10
## 3    40    116.23    1.313e-09
## 4    50    152.25    1.631e-12
## 
## 
## Elapsed time : 1.209719
plot(fit7, which=9)

spec71=ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="std")
fit71=ugarchfit(zoo_score,spec=spec71)
show(fit71)
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(2,0,2)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error     t value Pr(>|t|)
## mu     408.740267    1.559346   262.12284  0.00000
## ar1     -0.891486    0.000413 -2157.65176  0.00000
## ar2     -1.074362    0.000587 -1831.13668  0.00000
## ma1      0.891907    0.000458  1946.36784  0.00000
## ma2      1.074360    0.000362  2969.46551  0.00000
## omega   80.530876  222.971901     0.36117  0.71797
## alpha1   0.041042    0.060678     0.67640  0.49879
## beta1    0.932729    0.110471     8.44321  0.00000
## shape    2.270486    0.153748    14.76761  0.00000
## 
## Robust Standard Errors:
##          Estimate  Std. Error     t value Pr(>|t|)
## mu     408.740267  3.1836e+00  128.389494 0.000000
## ar1     -0.891486  1.6040e-03 -555.885062 0.000000
## ar2     -1.074362  1.4360e-03 -748.331379 0.000000
## ma1      0.891907  1.7710e-03  503.738194 0.000000
## ma2      1.074360  3.0900e-03  347.737518 0.000000
## omega   80.530876  1.0527e+03    0.076497 0.939024
## alpha1   0.041042  5.5095e-01    0.074493 0.940618
## beta1    0.932729  6.1805e-01    1.509159 0.131258
## shape    2.270486  1.1509e+00    1.972721 0.048527
## 
## LogLikelihood : -902.2495 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike        9.9590
## Bayes        10.1169
## Shibata       9.9545
## Hannan-Quinn 10.0230
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic   p-value
## Lag[1]                      0.5824 0.4453652
## Lag[2*(p+q)+(p+q)-1][11]    8.3299 0.0002454
## Lag[4*(p+q)+(p+q)-1][19]   13.5660 0.0770598
## d.o.f=4
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      1.976  0.1598
## Lag[2*(p+q)+(p+q)-1][5]     4.836  0.1667
## Lag[4*(p+q)+(p+q)-1][9]     7.448  0.1645
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.8504 0.500 2.000  0.3564
## ARCH Lag[5]    2.3968 1.440 1.667  0.3900
## ARCH Lag[7]    4.1184 2.315 1.543  0.3298
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  7.4832
## Individual Statistics:              
## mu     4.89015
## ar1    0.01210
## ar2    0.01729
## ma1    0.01235
## ma2    0.01697
## omega  0.29324
## alpha1 0.30551
## beta1  0.31577
## shape  0.34097
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          2.1 2.32 2.82
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias           1.0794 0.2819    
## Negative Sign Bias  0.7878 0.4319    
## Positive Sign Bias  0.4853 0.6281    
## Joint Effect        5.3764 0.1462    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     69.79    9.980e-08
## 2    30     91.26    2.325e-08
## 3    40    114.49    2.394e-09
## 4    50    114.54    3.631e-07
## 
## 
## Elapsed time : 0.5911944
plot(fit71, which=9)

spec6=ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="norm")
fit6=ugarchfit(zoo_score,spec=spec6)
show(fit6)
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(2,0,2)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error  t value Pr(>|t|)
## mu     396.26897    3.218318 123.1292  0.00000
## ar1     -0.73498    0.020936 -35.1067  0.00000
## ar2     -0.96471    0.022666 -42.5620  0.00000
## ma1      0.82531    0.005535 149.1151  0.00000
## ma2      1.00197    0.003753 266.9777  0.00000
## omega    4.44303    5.206873   0.8533  0.39349
## alpha1   0.00000    0.001754   0.0000  1.00000
## beta1    0.99729    0.002293 434.9467  0.00000
## 
## Robust Standard Errors:
##         Estimate  Std. Error   t value Pr(>|t|)
## mu     396.26897    2.974986 133.20026  0.00000
## ar1     -0.73498    0.019726 -37.25982  0.00000
## ar2     -0.96471    0.025239 -38.22274  0.00000
## ma1      0.82531    0.004669 176.77120  0.00000
## ma2      1.00197    0.005941 168.64334  0.00000
## omega    4.44303   15.329113   0.28984  0.77194
## alpha1   0.00000    0.009753   0.00000  1.00000
## beta1    0.99729    0.001166 855.63099  0.00000
## 
## LogLikelihood : -940.8996 
## 
## Information Criteria
## ------------------------------------
##                    
## Akaike       10.370
## Bayes        10.511
## Shibata      10.367
## Hannan-Quinn 10.427
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic p-value
## Lag[1]                      0.1948  0.6589
## Lag[2*(p+q)+(p+q)-1][11]    5.8724  0.5712
## Lag[4*(p+q)+(p+q)-1][19]   11.2858  0.2754
## d.o.f=4
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.6897  0.4063
## Lag[2*(p+q)+(p+q)-1][5]    2.7498  0.4546
## Lag[4*(p+q)+(p+q)-1][9]    5.8763  0.3127
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]  0.005056 0.500 2.000  0.9433
## ARCH Lag[5]  1.076681 1.440 1.667  0.7103
## ARCH Lag[7]  3.858726 2.315 1.543  0.3674
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  4.0683
## Individual Statistics:              
## mu     0.60591
## ar1    0.15105
## ar2    0.15701
## ma1    0.04591
## ma2    0.19138
## omega  0.09248
## alpha1 0.15130
## beta1  0.09301
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.89 2.11 2.59
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value    prob sig
## Sign Bias           0.8436 0.40000    
## Negative Sign Bias  1.3876 0.16698    
## Positive Sign Bias  0.2846 0.77630    
## Joint Effect        7.5310 0.05677   *
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     67.60    2.293e-07
## 2    30     85.69    1.653e-07
## 3    40     97.87    5.764e-07
## 4    50    101.43    1.599e-05
## 
## 
## Elapsed time : 0.2422023
plot(fit6, which=9)

spec61=ugarchspec(variance.model=list(model="iGARCH", garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="std")
fit61=ugarchfit(zoo_score,spec=spec61)
show(fit61)
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : iGARCH(1,1)
## Mean Model   : ARFIMA(2,0,2)
## Distribution : std 
## 
## Optimal Parameters
## ------------------------------------
##          Estimate  Std. Error     t value Pr(>|t|)
## mu     408.784138    1.541884  2.6512e+02  0.00000
## ar1     -0.892122    0.000142 -6.3037e+03  0.00000
## ar2     -1.076037    0.000068 -1.5922e+04  0.00000
## ma1      0.892489    0.000115  7.7448e+03  0.00000
## ma2      1.076033    0.000351  3.0616e+03  0.00000
## omega   36.605691   79.009449  4.6331e-01  0.64314
## alpha1   0.049434    0.049176  1.0053e+00  0.31477
## beta1    0.950566          NA          NA       NA
## shape    2.242432    0.110723  2.0253e+01  0.00000
## 
## Robust Standard Errors:
##          Estimate  Std. Error     t value Pr(>|t|)
## mu     408.784138    5.236565   78.063409 0.000000
## ar1     -0.892122    0.003489 -255.662414 0.000000
## ar2     -1.076037    0.004064 -264.799100 0.000000
## ma1      0.892489    0.003461  257.855045 0.000000
## ma2      1.076033    0.005618  191.537037 0.000000
## omega   36.605691  647.267135    0.056554 0.954900
## alpha1   0.049434    0.432042    0.114420 0.908905
## beta1    0.950566          NA          NA       NA
## shape    2.242432    0.776488    2.887918 0.003878
## 
## LogLikelihood : -902.3592 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike        9.9493
## Bayes        10.0896
## Shibata       9.9457
## Hannan-Quinn 10.0062
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                          statistic   p-value
## Lag[1]                      0.5603 0.4541595
## Lag[2*(p+q)+(p+q)-1][11]    8.0635 0.0008861
## Lag[4*(p+q)+(p+q)-1][19]   13.1501 0.1000650
## d.o.f=4
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                      2.695  0.1006
## Lag[2*(p+q)+(p+q)-1][5]     5.508  0.1172
## Lag[4*(p+q)+(p+q)-1][9]     7.967  0.1308
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.8565 0.500 2.000  0.3547
## ARCH Lag[5]    2.3686 1.440 1.667  0.3954
## ARCH Lag[7]    3.9329 2.315 1.543  0.3563
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  6.6065
## Individual Statistics:              
## mu     4.79874
## ar1    0.01168
## ar2    0.01685
## ma1    0.01186
## ma2    0.01662
## omega  0.27244
## alpha1 0.29581
## shape  0.32178
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.89 2.11 2.59
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value   prob sig
## Sign Bias           1.1365 0.2573    
## Negative Sign Bias  0.7809 0.4359    
## Positive Sign Bias  0.4382 0.6617    
## Joint Effect        5.5373 0.1364    
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     76.13    8.571e-09
## 2    30     95.20    5.656e-09
## 3    40    124.10    8.396e-11
## 4    50    118.37    1.137e-07
## 
## 
## Elapsed time : 0.3677073
plot(fit71,which = 9)

Forecast based on the model with best performance (sGarch on normal distribution)

m71=ugarchfit(zoo_score, spec=spec6, out.sample=10)
forecast71=ugarchforecast(m71,data=NULL,n.ahead=31,n.roll=10, out.sample=200)
forecast71
## 
## *------------------------------------*
## *       GARCH Model Forecast         *
## *------------------------------------*
## Model: sGARCH
## Horizon: 31
## Roll Steps: 10
## Out of Sample: 10
## 
## 0-roll forecast [T0=2018-12-21]:
##      Series Sigma
## T+1   407.9 38.63
## T+2   404.6 38.63
## T+3   381.0 38.63
## T+4   401.8 38.63
## T+5   409.0 38.63
## T+6   383.6 38.63
## T+7   395.5 38.63
## T+8   411.2 38.63
## T+9   387.9 38.63
## T+10  390.2 38.63
## T+11  410.9 38.63
## T+12  393.3 38.63
## T+13  386.5 38.63
## T+14  408.6 38.63
## T+15  398.7 38.63
## T+16  384.7 38.63
## T+17  404.7 38.63
## T+18  403.2 38.63
## T+19  385.0 38.63
## T+20  400.0 38.63
## T+21  406.4 38.63
## T+22  387.2 38.63
## T+23  395.4 38.63
## T+24  407.8 38.63
## T+25  390.6 38.63
## T+26  391.5 38.63
## T+27  407.4 38.63
## T+28  394.7 38.63
## T+29  388.8 38.63
## T+30  405.5 38.63
## T+31  398.7 38.63
plot(forecast71, which=2)

roll=ugarchroll(spec71, data=zoo_score, n.ahead=1, forecast.length=50, refit.every=10, refit.window=c("moving"))
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean
## = modelinc[1], : possible convergence problem: optim gave code = 1
plot(roll, which=4)