1. Basic statistical analysis for stock scores Jul-Dec 2018
stock = read.csv(file= "/ap5/AUDIT/liqiuy01/Statistical Research/time-series-data.csv", sep = ",", header = TRUE)
year = substr(stock$transactiondate,1,4)
month = substr(stock$transactiondate,5,6)
day = substr(stock$transactiondate,7,8)
stock_date = paste0(year,"/",month,"/",day)
date = as.Date(stock_date)
new_stock = cbind(as.numeric(stock$score),format(date,"%m/%d/%Y"))
new_stock = as.data.frame(new_stock)
colnames(new_stock)=c("score", "date")
means <- aggregate(as.numeric(score) ~ date, new_stock, mean)
head(means)
## date as.numeric(score)
## 1 07/02/2018 430.0724
## 2 07/03/2018 431.8398
## 3 07/04/2018 272.0000
## 4 07/05/2018 423.2860
## 5 07/06/2018 430.4893
## 6 07/07/2018 335.0000
plot(means, ylab = "daily avg score", "stock daily average score")
abline(h = c(200, 380), col = c("blue", "red"))

counts<- aggregate(as.numeric(score) ~ date, new_stock,length)
head(counts)
## date as.numeric(score)
## 1 07/02/2018 2087
## 2 07/03/2018 1860
## 3 07/04/2018 1
## 4 07/05/2018 3318
## 5 07/06/2018 985
## 6 07/07/2018 4
plot(counts, ylab = "daily transaction count", main = "stock daily transaction count")
abline(h = c(200, 4000), col = c("blue", "red"))

2. Time Series Analysis on stock Average Scores from Jul-Dec 2018
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
zoo_score = zoo(means[,2], seq(from = as.Date("2018-07-02"), to = as.Date("2018-12-31"), by = 1))
plot.zoo(zoo_score,main="Time Series Plot of Score", ylab = "Daily Avg Score")

library(forecast)
model1 = auto.arima(zoo_score)
summary(model1)
## Series: zoo_score
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.9672
## s.e. 0.0216
##
## sigma^2 estimated as 1907: log likelihood=-946.48
## AIC=1896.97 AICc=1897.03 BIC=1903.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.666771 43.43442 28.12934 -1.940551 8.074246 0.8061097
## ACF1
## Training set 0.06174204
tsdiag(model1,gof=20)

Box.test(model1$resid, lag = 20, type = "Ljung", fitdf = 2)### P-value=0.005837, there is serial correlation
##
## Box-Ljung test
##
## data: model1$resid
## X-squared = 36.686, df = 18, p-value = 0.005757
Box.test(model1$resid^2, lag = 20, type = "Ljung", fitdf = 2)####ARCH EFFECT, p_value = 0.1373, there is arch effect
##
## Box-Ljung test
##
## data: model1$resid^2
## X-squared = 20.653, df = 18, p-value = 0.2973
3. Use Garch Models: in order to fix serial correlation and arch effect
require(rugarch)
## Loading required package: rugarch
## Loading required package: parallel
##
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
##
## sigma
spec7=ugarchspec(variance.model=list(model="fGARCH",submodel="TGARCH",garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="std")
fit7=ugarchfit(zoo_score,spec=spec7)
show(fit7)
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : fGARCH(1,1)
## fGARCH Sub-Model : TGARCH
## Mean Model : ARFIMA(2,0,2)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 410.633802 1.407600 291.72629 0.000000
## ar1 -0.884774 0.001161 -762.33850 0.000000
## ar2 -1.047637 0.001534 -683.11312 0.000000
## ma1 0.887608 0.001256 706.79744 0.000000
## ma2 1.048684 0.001207 869.07614 0.000000
## omega 0.665507 1.162826 0.57232 0.567106
## alpha1 0.088923 0.050669 1.75497 0.079264
## beta1 0.922889 0.034411 26.81986 0.000000
## eta11 1.000000 0.706077 1.41627 0.156695
## shape 2.212202 0.112624 19.64229 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 410.633802 2.335907 175.79204 0.000000
## ar1 -0.884774 0.001255 -705.12335 0.000000
## ar2 -1.047637 0.002015 -519.83119 0.000000
## ma1 0.887608 0.001365 650.15252 0.000000
## ma2 1.048684 0.001541 680.57692 0.000000
## omega 0.665507 1.429395 0.46559 0.641511
## alpha1 0.088923 0.044089 2.01692 0.043703
## beta1 0.922889 0.028948 31.88040 0.000000
## eta11 1.000000 0.969282 1.03169 0.302217
## shape 2.212202 0.121191 18.25391 0.000000
##
## LogLikelihood : -900.9015
##
## Information Criteria
## ------------------------------------
##
## Akaike 9.9552
## Bayes 10.1306
## Shibata 9.9496
## Hannan-Quinn 10.0263
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1174 7.319e-01
## Lag[2*(p+q)+(p+q)-1][11] 9.7205 7.531e-08
## Lag[4*(p+q)+(p+q)-1][19] 14.8657 3.164e-02
## d.o.f=4
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.004597 0.9459
## Lag[2*(p+q)+(p+q)-1][5] 1.542359 0.7295
## Lag[4*(p+q)+(p+q)-1][9] 3.018147 0.7560
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.1766 0.500 2.000 0.6743
## ARCH Lag[5] 0.9388 1.440 1.667 0.7512
## ARCH Lag[7] 1.9694 2.315 1.543 0.7238
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 5.6777
## Individual Statistics:
## mu 2.87919
## ar1 0.01632
## ar2 0.01749
## ma1 0.01675
## ma2 0.01722
## omega 0.11235
## alpha1 0.11907
## beta1 0.12769
## eta11 0.09483
## shape 0.20883
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.29 2.54 3.05
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.80665 0.4209
## Negative Sign Bias 0.33244 0.7399
## Positive Sign Bias 0.08712 0.9307
## Joint Effect 0.87565 0.8313
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 70.22 8.442e-08
## 2 30 102.74 3.551e-10
## 3 40 116.23 1.313e-09
## 4 50 152.25 1.631e-12
##
##
## Elapsed time : 1.209719
plot(fit7, which=9)

spec71=ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="std")
fit71=ugarchfit(zoo_score,spec=spec71)
show(fit71)
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(2,0,2)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 408.740267 1.559346 262.12284 0.00000
## ar1 -0.891486 0.000413 -2157.65176 0.00000
## ar2 -1.074362 0.000587 -1831.13668 0.00000
## ma1 0.891907 0.000458 1946.36784 0.00000
## ma2 1.074360 0.000362 2969.46551 0.00000
## omega 80.530876 222.971901 0.36117 0.71797
## alpha1 0.041042 0.060678 0.67640 0.49879
## beta1 0.932729 0.110471 8.44321 0.00000
## shape 2.270486 0.153748 14.76761 0.00000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 408.740267 3.1836e+00 128.389494 0.000000
## ar1 -0.891486 1.6040e-03 -555.885062 0.000000
## ar2 -1.074362 1.4360e-03 -748.331379 0.000000
## ma1 0.891907 1.7710e-03 503.738194 0.000000
## ma2 1.074360 3.0900e-03 347.737518 0.000000
## omega 80.530876 1.0527e+03 0.076497 0.939024
## alpha1 0.041042 5.5095e-01 0.074493 0.940618
## beta1 0.932729 6.1805e-01 1.509159 0.131258
## shape 2.270486 1.1509e+00 1.972721 0.048527
##
## LogLikelihood : -902.2495
##
## Information Criteria
## ------------------------------------
##
## Akaike 9.9590
## Bayes 10.1169
## Shibata 9.9545
## Hannan-Quinn 10.0230
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.5824 0.4453652
## Lag[2*(p+q)+(p+q)-1][11] 8.3299 0.0002454
## Lag[4*(p+q)+(p+q)-1][19] 13.5660 0.0770598
## d.o.f=4
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 1.976 0.1598
## Lag[2*(p+q)+(p+q)-1][5] 4.836 0.1667
## Lag[4*(p+q)+(p+q)-1][9] 7.448 0.1645
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.8504 0.500 2.000 0.3564
## ARCH Lag[5] 2.3968 1.440 1.667 0.3900
## ARCH Lag[7] 4.1184 2.315 1.543 0.3298
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 7.4832
## Individual Statistics:
## mu 4.89015
## ar1 0.01210
## ar2 0.01729
## ma1 0.01235
## ma2 0.01697
## omega 0.29324
## alpha1 0.30551
## beta1 0.31577
## shape 0.34097
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.1 2.32 2.82
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.0794 0.2819
## Negative Sign Bias 0.7878 0.4319
## Positive Sign Bias 0.4853 0.6281
## Joint Effect 5.3764 0.1462
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 69.79 9.980e-08
## 2 30 91.26 2.325e-08
## 3 40 114.49 2.394e-09
## 4 50 114.54 3.631e-07
##
##
## Elapsed time : 0.5911944
plot(fit71, which=9)

spec6=ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="norm")
fit6=ugarchfit(zoo_score,spec=spec6)
show(fit6)
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(2,0,2)
## Distribution : norm
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 396.26897 3.218318 123.1292 0.00000
## ar1 -0.73498 0.020936 -35.1067 0.00000
## ar2 -0.96471 0.022666 -42.5620 0.00000
## ma1 0.82531 0.005535 149.1151 0.00000
## ma2 1.00197 0.003753 266.9777 0.00000
## omega 4.44303 5.206873 0.8533 0.39349
## alpha1 0.00000 0.001754 0.0000 1.00000
## beta1 0.99729 0.002293 434.9467 0.00000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 396.26897 2.974986 133.20026 0.00000
## ar1 -0.73498 0.019726 -37.25982 0.00000
## ar2 -0.96471 0.025239 -38.22274 0.00000
## ma1 0.82531 0.004669 176.77120 0.00000
## ma2 1.00197 0.005941 168.64334 0.00000
## omega 4.44303 15.329113 0.28984 0.77194
## alpha1 0.00000 0.009753 0.00000 1.00000
## beta1 0.99729 0.001166 855.63099 0.00000
##
## LogLikelihood : -940.8996
##
## Information Criteria
## ------------------------------------
##
## Akaike 10.370
## Bayes 10.511
## Shibata 10.367
## Hannan-Quinn 10.427
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1948 0.6589
## Lag[2*(p+q)+(p+q)-1][11] 5.8724 0.5712
## Lag[4*(p+q)+(p+q)-1][19] 11.2858 0.2754
## d.o.f=4
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.6897 0.4063
## Lag[2*(p+q)+(p+q)-1][5] 2.7498 0.4546
## Lag[4*(p+q)+(p+q)-1][9] 5.8763 0.3127
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.005056 0.500 2.000 0.9433
## ARCH Lag[5] 1.076681 1.440 1.667 0.7103
## ARCH Lag[7] 3.858726 2.315 1.543 0.3674
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 4.0683
## Individual Statistics:
## mu 0.60591
## ar1 0.15105
## ar2 0.15701
## ma1 0.04591
## ma2 0.19138
## omega 0.09248
## alpha1 0.15130
## beta1 0.09301
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.89 2.11 2.59
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.8436 0.40000
## Negative Sign Bias 1.3876 0.16698
## Positive Sign Bias 0.2846 0.77630
## Joint Effect 7.5310 0.05677 *
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 67.60 2.293e-07
## 2 30 85.69 1.653e-07
## 3 40 97.87 5.764e-07
## 4 50 101.43 1.599e-05
##
##
## Elapsed time : 0.2422023
plot(fit6, which=9)

spec61=ugarchspec(variance.model=list(model="iGARCH", garchOrder=c(1,1)),mean.model=list(armaOrder=c(2,2)),distribution.model="std")
fit61=ugarchfit(zoo_score,spec=spec61)
show(fit61)
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : iGARCH(1,1)
## Mean Model : ARFIMA(2,0,2)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 408.784138 1.541884 2.6512e+02 0.00000
## ar1 -0.892122 0.000142 -6.3037e+03 0.00000
## ar2 -1.076037 0.000068 -1.5922e+04 0.00000
## ma1 0.892489 0.000115 7.7448e+03 0.00000
## ma2 1.076033 0.000351 3.0616e+03 0.00000
## omega 36.605691 79.009449 4.6331e-01 0.64314
## alpha1 0.049434 0.049176 1.0053e+00 0.31477
## beta1 0.950566 NA NA NA
## shape 2.242432 0.110723 2.0253e+01 0.00000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 408.784138 5.236565 78.063409 0.000000
## ar1 -0.892122 0.003489 -255.662414 0.000000
## ar2 -1.076037 0.004064 -264.799100 0.000000
## ma1 0.892489 0.003461 257.855045 0.000000
## ma2 1.076033 0.005618 191.537037 0.000000
## omega 36.605691 647.267135 0.056554 0.954900
## alpha1 0.049434 0.432042 0.114420 0.908905
## beta1 0.950566 NA NA NA
## shape 2.242432 0.776488 2.887918 0.003878
##
## LogLikelihood : -902.3592
##
## Information Criteria
## ------------------------------------
##
## Akaike 9.9493
## Bayes 10.0896
## Shibata 9.9457
## Hannan-Quinn 10.0062
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.5603 0.4541595
## Lag[2*(p+q)+(p+q)-1][11] 8.0635 0.0008861
## Lag[4*(p+q)+(p+q)-1][19] 13.1501 0.1000650
## d.o.f=4
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 2.695 0.1006
## Lag[2*(p+q)+(p+q)-1][5] 5.508 0.1172
## Lag[4*(p+q)+(p+q)-1][9] 7.967 0.1308
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.8565 0.500 2.000 0.3547
## ARCH Lag[5] 2.3686 1.440 1.667 0.3954
## ARCH Lag[7] 3.9329 2.315 1.543 0.3563
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 6.6065
## Individual Statistics:
## mu 4.79874
## ar1 0.01168
## ar2 0.01685
## ma1 0.01186
## ma2 0.01662
## omega 0.27244
## alpha1 0.29581
## shape 0.32178
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.89 2.11 2.59
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 1.1365 0.2573
## Negative Sign Bias 0.7809 0.4359
## Positive Sign Bias 0.4382 0.6617
## Joint Effect 5.5373 0.1364
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 76.13 8.571e-09
## 2 30 95.20 5.656e-09
## 3 40 124.10 8.396e-11
## 4 50 118.37 1.137e-07
##
##
## Elapsed time : 0.3677073
plot(fit71,which = 9)
