I am working on The “data.csv” file which contains more than 170.000 songs collected from Spotify Web API, you can find it here https://www.kaggle.com/yamaerenay/spotify-dataset-19212020-160k-tracks?select=data_by_year.csv
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ───────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.2 ✓ fma 2.4
## ✓ forecast 8.13 ✓ expsmooth 2.3
##
library(forecast)
library(fGarch)
## Loading required package: timeDate
## Loading required package: timeSeries
## Loading required package: fBasics
library(ggplot2)
library(kableExtra)
library(rugarch)
## Loading required package: parallel
##
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
##
## sigma
library(tseries)
spotify = data_by_year = read.csv("~/Desktop/data_by_year.csv", stringsAsFactors=TRUE)
spotify.ts=ts(spotify[,4],start=c(1980),end=c(2010),frequency = 12)
autoplot(spotify.ts)+ggtitle("spotify.1921~2020")
#Garch
spotify.garch = garchFit(data = spotify.ts)
##
## Series Initialization:
## ARMA Model: arma
## Formula Mean: ~ arma(0, 0)
## GARCH Model: garch
## Formula Variance: ~ garch(1, 1)
## ARMA Order: 0 0
## Max ARMA Order: 0
## GARCH Order: 1 1
## Max GARCH Order: 1
## Maximum Order: 1
## Conditional Dist: norm
## h.start: 2
## llh.start: 1
## Length of Series: 361
## Recursion Init: mci
## Series Scale: 0.052181
##
## Parameter Initialization:
## Initial Parameters: $params
## Limits of Transformations: $U, $V
## Which Parameters are Fixed? $includes
## Parameter Matrix:
## U V params includes
## mu -102.07428938 102.0743 10.20743 TRUE
## omega 0.00000100 100.0000 0.10000 TRUE
## alpha1 0.00000001 1.0000 0.10000 TRUE
## gamma1 -0.99999999 1.0000 0.10000 FALSE
## beta1 0.00000001 1.0000 0.80000 TRUE
## delta 0.00000000 2.0000 2.00000 FALSE
## skew 0.10000000 10.0000 1.00000 FALSE
## shape 1.00000000 10.0000 4.00000 FALSE
## Index List of Parameters to be Optimized:
## mu omega alpha1 beta1
## 1 2 3 5
## Persistence: 0.9
##
##
## --- START OF TRACE ---
## Selected Algorithm: nlminb
##
## R coded nlminb Solver:
##
## 0: 480.88620: 10.2074 0.100000 0.100000 0.800000
## 1: 474.99787: 10.2229 0.0748371 0.102476 0.781159
## 2: 466.06358: 10.3380 0.0531344 0.167143 0.782263
## 3: 460.37532: 10.2820 0.0268716 0.165681 0.766062
## 4: 460.30177: 10.5209 0.0248293 0.186223 0.769311
## 5: 455.88682: 10.3765 0.0189969 0.188338 0.766558
## 6: 454.72649: 10.3928 0.00944637 0.199982 0.762464
## 7: 453.48100: 10.3815 0.0162582 0.213969 0.760782
## 8: 451.99578: 10.3880 0.00961579 0.225447 0.752426
## 9: 450.18515: 10.4049 0.0160470 0.248090 0.731747
## 10: 449.43479: 10.3598 0.00180486 0.303497 0.706342
## 11: 448.86174: 10.4289 0.0189662 0.299531 0.646495
## 12: 448.33798: 10.3518 0.0226531 0.302860 0.649789
## 13: 447.51369: 10.4140 0.0229092 0.308144 0.655028
## 14: 447.05289: 10.3787 0.0217606 0.315924 0.659356
## 15: 446.64462: 10.3983 0.0194995 0.323399 0.654078
## 16: 446.48750: 10.3748 0.0123912 0.329052 0.656254
## 17: 446.00290: 10.4089 0.0126974 0.336197 0.661748
## 18: 445.80562: 10.3836 0.0116569 0.345388 0.662645
## 19: 445.66016: 10.3988 0.0118693 0.353523 0.657738
## 20: 445.59809: 10.3908 0.0123666 0.361137 0.651933
## 21: 445.57336: 10.3929 0.00926619 0.367972 0.657946
## 22: 445.55402: 10.4049 0.0104478 0.375655 0.652401
## 23: 445.48897: 10.3944 0.0109986 0.383302 0.646681
## 24: 445.48218: 10.3933 0.0105928 0.385333 0.644363
## 25: 445.48120: 10.3929 0.0109012 0.385768 0.643532
## 26: 445.48120: 10.3929 0.0109095 0.385893 0.643441
## 27: 445.48120: 10.3929 0.0109100 0.385885 0.643442
##
## Final Estimate of the Negative LLH:
## LLH: -620.5651 norm LLH: -1.719017
## mu omega alpha1 beta1
## 5.423122e-01 2.970629e-05 3.858849e-01 6.434418e-01
##
## R-optimhess Difference Approximated Hessian Matrix:
## mu omega alpha1 beta1
## mu -284021.7160 1602700 770.7974 3913.021
## omega 1602699.5334 -4007345025 -1120294.8765 -2166551.559
## alpha1 770.7974 -1120295 -1003.6825 -1421.240
## beta1 3913.0209 -2166552 -1421.2403 -2895.458
## attr(,"time")
## Time difference of 0.01768899 secs
##
## --- END OF TRACE ---
##
##
## Time to Estimate Parameters:
## Time difference of 0.06142902 secs
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
summary(spotify.garch)
##
## Title:
## GARCH Modelling
##
## Call:
## garchFit(data = spotify.ts)
##
## Mean and Variance Equation:
## data ~ garch(1, 1)
## <environment: 0x7fe3e9300ee0>
## [data = spotify.ts]
##
## Conditional Distribution:
## norm
##
## Coefficient(s):
## mu omega alpha1 beta1
## 5.4231e-01 2.9706e-05 3.8588e-01 6.4344e-01
##
## Std. Errors:
## based on Hessian
##
## Error Analysis:
## Estimate Std. Error t value Pr(>|t|)
## mu 5.423e-01 1.911e-03 283.818 < 2e-16 ***
## omega 2.971e-05 2.054e-05 1.447 0.148
## alpha1 3.859e-01 5.771e-02 6.686 2.29e-11 ***
## beta1 6.434e-01 3.687e-02 17.453 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log Likelihood:
## 620.5651 normalized: 1.719017
##
## Description:
## Wed Nov 25 18:41:31 2020 by user:
##
##
## Standardised Residuals Tests:
## Statistic p-Value
## Jarque-Bera Test R Chi^2 3.420818 0.1807918
## Shapiro-Wilk Test R W 0.9724914 2.355365e-06
## Ljung-Box Test R Q(10) 1072.909 0
## Ljung-Box Test R Q(15) 1310.187 0
## Ljung-Box Test R Q(20) 1455.893 0
## Ljung-Box Test R^2 Q(10) 35.02894 0.0001234532
## Ljung-Box Test R^2 Q(15) 43.5724 0.0001282542
## Ljung-Box Test R^2 Q(20) 57.00455 2.049346e-05
## LM Arch Test R TR^2 49.45792 1.739157e-06
##
## Information Criterion Statistics:
## AIC BIC SIC HQIC
## -3.415873 -3.372783 -3.416115 -3.398741
garch.fc = predict(spotify.garch, plot = TRUE)
#Arima
spotify.arima = arima(spotify.ts)
summary(spotify.arima)
##
## Call:
## arima(x = spotify.ts)
##
## Coefficients:
## intercept
## 0.5326
## s.e. 0.0027
##
## sigma^2 estimated as 0.002715: log likelihood = 554.31, aic = -1104.62
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 6.207419e-15 0.05210868 0.04219537 -0.962107 8.02855 1.66276
## ACF1
## Training set 0.6458336
checkresiduals(spotify.arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,0) with non-zero mean
## Q* = 1326, df = 23, p-value < 2.2e-16
##
## Model df: 1. Total lags used: 24
#ETS
spotify.ets = ets(spotify.ts, model = "ZZZ")
summary(spotify.ets)
## ETS(M,Ad,N)
##
## Call:
## ets(y = spotify.ts, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.3593
## beta = 0.0307
## phi = 0.8
##
## Initial states:
## l = 0.3759
## b = 0.0787
##
## sigma: 0.0624
##
## AIC AICc BIC
## -327.0870 -326.8497 -303.7537
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.00079283 0.03609215 0.02154861 -0.5020733 4.1767 0.4864542
## ACF1
## Training set -0.04495717
autoplot(spotify.ets)
checkresiduals(spotify.ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,N)
## Q* = 31.432, df = 19, p-value = 0.03618
##
## Model df: 5. Total lags used: 24