week 5

I am working on The “data.csv” file which contains more than 170.000 songs collected from Spotify Web API, you can find it here https://www.kaggle.com/yamaerenay/spotify-dataset-19212020-160k-tracks?select=data_by_year.csv

library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## ── Attaching packages ───────────────────────────────────────────────── fpp2 2.4 ──

## ✓ ggplot2   3.3.2     ✓ fma       2.4  
## ✓ forecast  8.13      ✓ expsmooth 2.3

##

library(forecast)
library(fGarch)

## Loading required package: timeDate

## Loading required package: timeSeries

## Loading required package: fBasics

library(ggplot2)
library(kableExtra)
library(rugarch)

## Loading required package: parallel

## 
## Attaching package: 'rugarch'

## The following object is masked from 'package:stats':
## 
##     sigma

library(tseries)

spotify = data_by_year = read.csv("~/Desktop/data_by_year.csv", stringsAsFactors=TRUE)
spotify.ts=ts(spotify[,4],start=c(1980),end=c(2010),frequency = 12)

autoplot(spotify.ts)+ggtitle("spotify.1921~2020")

#Garch
spotify.garch = garchFit(data = spotify.ts)

## 
## Series Initialization:
##  ARMA Model:                arma
##  Formula Mean:              ~ arma(0, 0)
##  GARCH Model:               garch
##  Formula Variance:          ~ garch(1, 1)
##  ARMA Order:                0 0
##  Max ARMA Order:            0
##  GARCH Order:               1 1
##  Max GARCH Order:           1
##  Maximum Order:             1
##  Conditional Dist:          norm
##  h.start:                   2
##  llh.start:                 1
##  Length of Series:          361
##  Recursion Init:            mci
##  Series Scale:              0.052181
## 
## Parameter Initialization:
##  Initial Parameters:          $params
##  Limits of Transformations:   $U, $V
##  Which Parameters are Fixed?  $includes
##  Parameter Matrix:
##                        U        V   params includes
##     mu     -102.07428938 102.0743 10.20743     TRUE
##     omega     0.00000100 100.0000  0.10000     TRUE
##     alpha1    0.00000001   1.0000  0.10000     TRUE
##     gamma1   -0.99999999   1.0000  0.10000    FALSE
##     beta1     0.00000001   1.0000  0.80000     TRUE
##     delta     0.00000000   2.0000  2.00000    FALSE
##     skew      0.10000000  10.0000  1.00000    FALSE
##     shape     1.00000000  10.0000  4.00000    FALSE
##  Index List of Parameters to be Optimized:
##     mu  omega alpha1  beta1 
##      1      2      3      5 
##  Persistence:                  0.9 
## 
## 
## --- START OF TRACE ---
## Selected Algorithm: nlminb 
## 
## R coded nlminb Solver: 
## 
##   0:     480.88620:  10.2074 0.100000 0.100000 0.800000
##   1:     474.99787:  10.2229 0.0748371 0.102476 0.781159
##   2:     466.06358:  10.3380 0.0531344 0.167143 0.782263
##   3:     460.37532:  10.2820 0.0268716 0.165681 0.766062
##   4:     460.30177:  10.5209 0.0248293 0.186223 0.769311
##   5:     455.88682:  10.3765 0.0189969 0.188338 0.766558
##   6:     454.72649:  10.3928 0.00944637 0.199982 0.762464
##   7:     453.48100:  10.3815 0.0162582 0.213969 0.760782
##   8:     451.99578:  10.3880 0.00961579 0.225447 0.752426
##   9:     450.18515:  10.4049 0.0160470 0.248090 0.731747
##  10:     449.43479:  10.3598 0.00180486 0.303497 0.706342
##  11:     448.86174:  10.4289 0.0189662 0.299531 0.646495
##  12:     448.33798:  10.3518 0.0226531 0.302860 0.649789
##  13:     447.51369:  10.4140 0.0229092 0.308144 0.655028
##  14:     447.05289:  10.3787 0.0217606 0.315924 0.659356
##  15:     446.64462:  10.3983 0.0194995 0.323399 0.654078
##  16:     446.48750:  10.3748 0.0123912 0.329052 0.656254
##  17:     446.00290:  10.4089 0.0126974 0.336197 0.661748
##  18:     445.80562:  10.3836 0.0116569 0.345388 0.662645
##  19:     445.66016:  10.3988 0.0118693 0.353523 0.657738
##  20:     445.59809:  10.3908 0.0123666 0.361137 0.651933
##  21:     445.57336:  10.3929 0.00926619 0.367972 0.657946
##  22:     445.55402:  10.4049 0.0104478 0.375655 0.652401
##  23:     445.48897:  10.3944 0.0109986 0.383302 0.646681
##  24:     445.48218:  10.3933 0.0105928 0.385333 0.644363
##  25:     445.48120:  10.3929 0.0109012 0.385768 0.643532
##  26:     445.48120:  10.3929 0.0109095 0.385893 0.643441
##  27:     445.48120:  10.3929 0.0109100 0.385885 0.643442
## 
## Final Estimate of the Negative LLH:
##  LLH:  -620.5651    norm LLH:  -1.719017 
##           mu        omega       alpha1        beta1 
## 5.423122e-01 2.970629e-05 3.858849e-01 6.434418e-01 
## 
## R-optimhess Difference Approximated Hessian Matrix:
##                  mu       omega        alpha1        beta1
## mu     -284021.7160     1602700      770.7974     3913.021
## omega  1602699.5334 -4007345025 -1120294.8765 -2166551.559
## alpha1     770.7974    -1120295    -1003.6825    -1421.240
## beta1     3913.0209    -2166552    -1421.2403    -2895.458
## attr(,"time")
## Time difference of 0.01768899 secs
## 
## --- END OF TRACE ---
## 
## 
## Time to Estimate Parameters:
##  Time difference of 0.06142902 secs

## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
##   Consider formula(paste(x, collapse = " ")) instead.

summary(spotify.garch)

## 
## Title:
##  GARCH Modelling 
## 
## Call:
##  garchFit(data = spotify.ts) 
## 
## Mean and Variance Equation:
##  data ~ garch(1, 1)
## <environment: 0x7fe3e9300ee0>
##  [data = spotify.ts]
## 
## Conditional Distribution:
##  norm 
## 
## Coefficient(s):
##         mu       omega      alpha1       beta1  
## 5.4231e-01  2.9706e-05  3.8588e-01  6.4344e-01  
## 
## Std. Errors:
##  based on Hessian 
## 
## Error Analysis:
##         Estimate  Std. Error  t value Pr(>|t|)    
## mu     5.423e-01   1.911e-03  283.818  < 2e-16 ***
## omega  2.971e-05   2.054e-05    1.447    0.148    
## alpha1 3.859e-01   5.771e-02    6.686 2.29e-11 ***
## beta1  6.434e-01   3.687e-02   17.453  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log Likelihood:
##  620.5651    normalized:  1.719017 
## 
## Description:
##  Wed Nov 25 18:41:31 2020 by user:  
## 
## 
## Standardised Residuals Tests:
##                                 Statistic p-Value     
##  Jarque-Bera Test   R    Chi^2  3.420818  0.1807918   
##  Shapiro-Wilk Test  R    W      0.9724914 2.355365e-06
##  Ljung-Box Test     R    Q(10)  1072.909  0           
##  Ljung-Box Test     R    Q(15)  1310.187  0           
##  Ljung-Box Test     R    Q(20)  1455.893  0           
##  Ljung-Box Test     R^2  Q(10)  35.02894  0.0001234532
##  Ljung-Box Test     R^2  Q(15)  43.5724   0.0001282542
##  Ljung-Box Test     R^2  Q(20)  57.00455  2.049346e-05
##  LM Arch Test       R    TR^2   49.45792  1.739157e-06
## 
## Information Criterion Statistics:
##       AIC       BIC       SIC      HQIC 
## -3.415873 -3.372783 -3.416115 -3.398741

garch.fc = predict(spotify.garch, plot = TRUE)

#Arima

spotify.arima = arima(spotify.ts)

summary(spotify.arima)

## 
## Call:
## arima(x = spotify.ts)
## 
## Coefficients:
##       intercept
##          0.5326
## s.e.     0.0027
## 
## sigma^2 estimated as 0.002715:  log likelihood = 554.31,  aic = -1104.62
## 
## Training set error measures:
##                        ME       RMSE        MAE       MPE    MAPE    MASE
## Training set 6.207419e-15 0.05210868 0.04219537 -0.962107 8.02855 1.66276
##                   ACF1
## Training set 0.6458336

checkresiduals(spotify.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,0) with non-zero mean
## Q* = 1326, df = 23, p-value < 2.2e-16
## 
## Model df: 1.   Total lags used: 24

#ETS
spotify.ets = ets(spotify.ts, model = "ZZZ")

summary(spotify.ets)

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = spotify.ts, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.3593 
##     beta  = 0.0307 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 0.3759 
##     b = 0.0787 
## 
##   sigma:  0.0624
## 
##       AIC      AICc       BIC 
## -327.0870 -326.8497 -303.7537 
## 
## Training set error measures:
##                       ME       RMSE        MAE        MPE   MAPE      MASE
## Training set -0.00079283 0.03609215 0.02154861 -0.5020733 4.1767 0.4864542
##                     ACF1
## Training set -0.04495717

autoplot(spotify.ets)

checkresiduals(spotify.ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,N)
## Q* = 31.432, df = 19, p-value = 0.03618
## 
## Model df: 5.   Total lags used: 24

week 5

Yuanye Gao

11/25/2020