# Silence warnings
import sys
import warnings
import matplotlib.cbook
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)

# Data Manipulation
import pandas as pd
from pandas import Series
from pandas.tseries.offsets import DateOffset
import numpy as np
import scipy
from scipy import stats
import scipy.stats as scs
import itertools
from scipy.special import boxcox, inv_boxcox

# Data Visualization
import matplotlib.pyplot as plt
import matplotlib as m
import seaborn as sns

# Data Source
import yfinance as yf

# Stats models
import statsmodels
import statsmodels.api as sm
import statsmodels.stats as sms
from statsmodels.stats.stattools import jarque_bera
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.tsa.api as smt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.graphics import tsaplots
import pmdarima as pm

# Date tools
from datetime import datetime, timedelta, date

# Prophet model
import prophet
from prophet import Prophet

# Imports for model metrics and performance
import math
from math import sqrt 
import sklearn
from sklearn.metrics import mean_squared_error
import plotly as py
import plotly.express as px
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from sklearn.preprocessing import MinMaxScaler

# Graphic params
plt.rcParams["figure.figsize"] = (14,6)
plt.style.use('fivethirtyeight')
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
m.rcParams['axes.labelsize'] = 14
m.rcParams['xtick.labelsize'] = 12
m.rcParams['ytick.labelsize'] = 12
m.rcParams['text.color'] = 'k'


data = yf.download(tickers='BTC-USD', start = '2015-01-01', end = date.today(), interval = '1D')

[*********************100%***********************]  1 of 1 completed


dataClose = data['Close'].reset_index()
dataCloseSerie = dataClose.set_index('Date')


# Data Visualization
data.head()


# Plot temporal serie
data.Close.plot(figsize = (15, 6))
plt.show()


# Defining a function to test serie stationarity.
def stationarity_test(serie):
    
    # Calculate mobile stats
    rolmean = serie.rolling(window = 12).mean()
    rolstd = serie.rolling(window = 12).std()

    # Plot mobile stats
    orig = plt.plot(serie, color = 'blue', label = 'Original')
    mean = plt.plot(rolmean, color = 'red', label = 'Mobile Mean')
    std = plt.plot(rolstd, color = 'black', label = 'Standard Deviation')
    plt.legend(loc = 'best')
    plt.title('Mobile Stats - Mean & Standard Deviation')
    plt.show()
    
    # Dickey-Fuller Test:
    # Print
    print('\nDickey-Fuller Test Result:\n')

    # Test
    dftest = adfuller(serie, autolag = 'AIC')

    # Output
    dfout= pd.Series(dftest[0:4], index = ['Test Stats',
                                               'p-value',
                                               'Number of Lags',
                                               'Number of Used Observations'])

    # Loop for each test output
    for key, value in dftest[4].items():
        dfout['Critic Value (%s)'%key] = value

    # Print
    print (dfout)
    
    # Test p-value
    print ('\nConclusion:')
    if dfout[1] > 0.05:
        print('\nO p-valuer > 0.05 and, therefore, we have no evidences to reject H0.')
        print('This serie probably is not stationary.')
    else:
        print('\nO p-value < 0.05 and, therefore, we have evidences to reject H0.')
        print('This serie probably is stationary')


# Apply function
stationarity_test(data['Close'])

Dickey-Fuller Test Result:

Test Stats                       -0.529893
p-value                           0.886050
Number of Lags                   27.000000
Number of Used Observations    2433.000000
Critic Value (1%)                -3.433041
Critic Value (5%)                -2.862729
Critic Value (10%)               -2.567403
dtype: float64

Conclusion:

O p-valuer > 0.05 and, therefore, we have no evidences to reject H0.
This serie probably is not stationary.


# Decompose - Addictive
result_add = seasonal_decompose(data['Close'], model = 'additive', period=365, extrapolate_trend = 'freq')

# Plot
result_add.plot()
plt.show()


# Decompose - Multuplicative 
result_mul = seasonal_decompose(data['Close'], model = 'multiplicative', period = 365, extrapolate_trend = 'freq')

# Plot
result_mul.plot()
plt.show()


# Extracting Components
# Observations = (Seasonal * Trend * Resid)
df_reconstructed = pd.concat([result_mul.seasonal, result_mul.trend, result_mul.resid, result_mul.observed], axis = 1)
df_reconstructed.columns = ['Seasonal', 'Trend', 'Resid', 'Observed_Values']
df_reconstructed.head()


# Plot ACF (Auto Correlation)
acf_plot = plot_acf(data.Close, lags = 100)


data['Close_Box'], lmbda = stats.boxcox(data.Close)


# Differentiation function
def diffFunc(dataset, interval = 1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return diff


# invert differenced value
def inverse_difference(history, yhat):
    desdiff = list()
    for i in range(1, len(yhat)):
        valued = yhat[i] + history[i-1]
        desdiff.append(valued)
    return desdiff


# Differentiation of 1st order
cox_diff = diffFunc(data.Close_Box)
plt.plot(cox_diff);


## testing undiff function
undif = inverse_difference(data['Close'], cox_diff)
undif
plt.plot(undif);


len(cox_diff)

2460


# Convert in df
cox_diff = pd.DataFrame(cox_diff)


# Testing if the differentiation was enough
stationarity_test(cox_diff)

Dickey-Fuller Test Result:

Test Stats                    -1.471611e+01
p-value                        2.798389e-27
Number of Lags                 9.000000e+00
Number of Used Observations    2.450000e+03
Critic Value (1%)             -3.433022e+00
Critic Value (5%)             -2.862720e+00
Critic Value (10%)            -2.567398e+00
dtype: float64

Conclusion:

O p-value < 0.05 and, therefore, we have evidences to reject H0.
This serie probably is stationary


# Remove NA values generated
cox_diff.dropna(inplace=True)


x = pd.Series([0])
cox_diff = pd.concat([x, cox_diff])
data.reset_index()

data = data.reset_index()
cox_diff = cox_diff.set_index(data['Date'])
data = data.set_index('Date')


data['cox_diff'] = cox_diff


# Accuracy of the model
def performance(y_true, y_pred): 
    mse = ((y_pred - y_true) ** 2).mean()
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    return( print('MSE of predictions is {}'.format(round(mse, 2))+
                  '\nRMSE of predictions is {}'.format(round(np.sqrt(mse), 2))+
                  '\nMAPE of predictions is {}'.format(round(mape, 2))))


# Split test and train
X = data
train_size = int(len(X) * 0.80)
trainset, testset = X[0:train_size], X[train_size:]


target = trainset['cox_diff']


# Create an array with target var values of trainset
array_count_train = np.asarray(target)
array_count_train

array([ 0.        ,  0.00392918, -0.17923908, ..., -0.04434739,
       -0.09800923,  0.02277476])


# Copy of validation data
df_valid_cp = testset.copy()
df_valid_cp.head()


# Predictions
df_valid_cp['naive_prediction'] = array_count_train[len(array_count_train) - 1]


# Real and Predicted Value
df_valid_cp[['cox_diff', 'naive_prediction']].head()


# Plot
plt.title("Prediction Using Naive Bayes") 
plt.plot(trainset.index, trainset['cox_diff'], label = 'Train Set') 
plt.plot(testset.index, testset['cox_diff'], label = 'Test Set') 
plt.plot(df_valid_cp.index, df_valid_cp['naive_prediction'], label = 'Naive Forecast') 
plt.legend(loc = 'best') 
plt.show()


# Model
model_AR = ARIMA(target, order = (2, 1, 0))

/home/natalia/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning:

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.

/home/natalia/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning:

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


# Fitting
model_arima_fit = model_AR.fit()


# Real Data and Predictions
real_data = target
pred = model_arima_fit.fittedvalues


# Plot
plt.plot(real_data, label = 'Original Serie') 
plt.plot(pred, color = 'yellow', label = 'Predictions') 
plt.legend(loc = 'best') 
plt.show()


# Summary
model_arima_fit.summary2()


# Forecast - alpha is the interval of confidence - 95%
fc, se, conf = model_arima_fit.forecast(len(testset), alpha = 0.05)


# Dataframe to plot
fc_series = pd.Series(fc, index = testset.index)
lower_limit = pd.Series(conf[:, 0], index = testset.index)
upper_limit = pd.Series(conf[:, 1], index = testset.index)


# Getting real values to forecast
forecast = inverse_difference(testset['Close_Box'], fc_series)
forecast = inv_boxcox(forecast, lmbda)
forecast = pd.DataFrame(forecast)
forecast = pd.concat([x, forecast])
forecast.reset_index()
forecast = forecast.set_index(testset.index)

# Getting real values to lower limit
lower = inverse_difference(testset['Close_Box'], lower_limit)
lower = inv_boxcox(lower, lmbda)

# Getting real values to upper limit
upper = inverse_difference(testset['Close_Box'], upper_limit)
upper = inv_boxcox(upper, lmbda)


# Plot
plt.plot(trainset['Close'], label = 'Train Data')
plt.plot(testset['Close'], label = 'Test Data')
plt.plot(forecast[1:], label = 'Predictions')
plt.fill_between(lower_limit.index[1:], lower, upper, color = 'k', alpha = .15)
plt.title('Predictions with ARIMA Model')
plt.legend(loc = 'upper left', fontsize = 12)
plt.show()


# Performance
arima_results = performance(testset['Close'].values, forecast.values)
arima_results

MSE of predictions is 597212517.3
RMSE of predictions is 24437.93
MAPE of predictions is 98.06


# Grid Search to find the best model
model_sarima = pm.auto_arima(trainset['cox_diff'],
                          seasonal = True, 
                          m = 12,
                          d = 0,
                          D = 1,
                          max_p = 2, 
                          max_q = 2,
                          trace = True,
                          error_action = 'ignore',
                          suppress_warnings = True)

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,1,1)[12] intercept   : AIC=inf, Time=27.45 sec
 ARIMA(0,0,0)(0,1,0)[12] intercept   : AIC=-3133.761, Time=0.63 sec
 ARIMA(1,0,0)(1,1,0)[12] intercept   : AIC=-3674.918, Time=4.93 sec
 ARIMA(0,0,1)(0,1,1)[12] intercept   : AIC=inf, Time=11.08 sec
 ARIMA(0,0,0)(0,1,0)[12]             : AIC=-3135.756, Time=0.25 sec
 ARIMA(1,0,0)(0,1,0)[12] intercept   : AIC=-3132.029, Time=0.65 sec
 ARIMA(1,0,0)(2,1,0)[12] intercept   : AIC=-3887.992, Time=16.38 sec
 ARIMA(1,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=40.10 sec
 ARIMA(1,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=13.29 sec
 ARIMA(0,0,0)(2,1,0)[12] intercept   : AIC=-3889.471, Time=14.39 sec
 ARIMA(0,0,0)(1,1,0)[12] intercept   : AIC=-3676.873, Time=3.55 sec
 ARIMA(0,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=40.99 sec
 ARIMA(0,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=14.93 sec
 ARIMA(0,0,1)(2,1,0)[12] intercept   : AIC=-3887.981, Time=16.08 sec
 ARIMA(1,0,1)(2,1,0)[12] intercept   : AIC=-3886.111, Time=22.10 sec
 ARIMA(0,0,0)(2,1,0)[12]             : AIC=-3891.434, Time=3.43 sec
 ARIMA(0,0,0)(1,1,0)[12]             : AIC=-3678.858, Time=0.93 sec
 ARIMA(0,0,0)(2,1,1)[12]             : AIC=inf, Time=21.48 sec
 ARIMA(0,0,0)(1,1,1)[12]             : AIC=inf, Time=7.31 sec
 ARIMA(1,0,0)(2,1,0)[12]             : AIC=-3889.954, Time=5.33 sec
 ARIMA(0,0,1)(2,1,0)[12]             : AIC=-3889.943, Time=8.78 sec
 ARIMA(1,0,1)(2,1,0)[12]             : AIC=-3888.073, Time=5.19 sec

Best model:  ARIMA(0,0,0)(2,1,0)[12]          
Total fit time: 279.280 seconds


# Summary
print(model_sarima.summary())

                                SARIMAX Results                                 
================================================================================
Dep. Variable:                        y   No. Observations:                 1968
Model:             SARIMAX(2, 1, 0, 12)   Log Likelihood                1948.717
Date:                  Thu, 30 Sep 2021   AIC                          -3891.434
Time:                          13:41:14   BIC                          -3874.698
Sample:                               0   HQIC                         -3885.282
                                 - 1968                                         
Covariance Type:                    opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.S.L12      -0.6556      0.012    -54.425      0.000      -0.679      -0.632
ar.S.L24      -0.3272      0.014    -24.199      0.000      -0.354      -0.301
sigma2         0.0080      0.000     74.438      0.000       0.008       0.008
===================================================================================
Ljung-Box (L1) (Q):                   0.53   Jarque-Bera (JB):              7852.83
Prob(Q):                              0.47   Prob(JB):                         0.00
Heteroskedasticity (H):               2.61   Skew:                            -0.74
Prob(H) (two-sided):                  0.00   Kurtosis:                        12.70
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


# Create the model
model_sarima_v1 = sm.tsa.statespace.SARIMAX(data['cox_diff'],
                                             order = (0, 0, 0),
                                             seasonal_order = (2, 1, 0, 12),
                                             enforce_stationarity = False,
                                             enforce_invertibility = False)

/home/natalia/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning:

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.

/home/natalia/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning:

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


# Train the Model
model_sarima_v1_fit = model_sarima_v1.fit()

/home/natalia/anaconda3/lib/python3.8/site-packages/statsmodels/base/model.py:566: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals


# Summary
print(model_sarima_v1_fit.summary())

                                SARIMAX Results                                 
================================================================================
Dep. Variable:                 cox_diff   No. Observations:                 2461
Model:             SARIMAX(2, 1, 0, 12)   Log Likelihood                2368.938
Date:                  Thu, 30 Sep 2021   AIC                          -4731.877
Time:                          13:41:16   BIC                          -4714.496
Sample:                               0   HQIC                         -4725.557
                                 - 2461                                         
Covariance Type:                    opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.S.L12      -0.6606      0.012    -56.983      0.000      -0.683      -0.638
ar.S.L24      -0.3013      0.013    -22.956      0.000      -0.327      -0.276
sigma2         0.0083      0.000     76.247      0.000       0.008       0.009
===================================================================================
Ljung-Box (L1) (Q):                   2.53   Jarque-Bera (JB):              6342.67
Prob(Q):                              0.11   Prob(JB):                         0.00
Heteroskedasticity (H):               3.37   Skew:                            -0.57
Prob(H) (two-sided):                  0.00   Kurtosis:                        10.84
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


# Model stats
model_sarima_v1_fit.plot_diagnostics(figsize = (16, 8))
plt.show()


# Predictions
sarima_predict_1 = model_sarima_v1_fit.get_prediction(start = pd.to_datetime('2020-05-20'), 
                                                       end = pd.to_datetime('2021-09-25'),
                                                       dynamic = False)


# Confidence Interval
sarima_predict_conf_1 = sarima_predict_1.conf_int()
sarima_predict_conf_1


# Plot of observed values
ax = data.cox_diff.plot(label = 'Observed Values', color = '#2574BF')

# Plot of predicted values
sarima_predict_1.predicted_mean.plot(ax = ax, 
                                     label = 'SARIMA (0, 0, 0)x(0, 1, 1, 12)', 
                                     alpha = 0.7, 
                                     color = 'red') 

# Confidence Interval
ax.fill_between(sarima_predict_conf_1.index,
                # lower limit
                sarima_predict_conf_1.iloc[:, 0],
                # upper limit
                sarima_predict_conf_1.iloc[:, 1], color = 'k', alpha = 0.1)

# Title and legend
plt.title('Bitcoin Price Predictions with Seasonal ARIMA Model')
plt.xlabel('Date')
plt.ylabel('Bitcoin Prices')
plt.legend()
plt.show()


# getting real values
sarima_pred = inverse_difference(testset['Close_Box'], sarima_predict_1.predicted_mean)
sarima_pred = inv_boxcox(sarima_pred, lmbda)
sarima_pred = pd.DataFrame(sarima_pred)
sarima_pred = pd.concat([x, sarima_pred])
sarima_pred.reset_index()
sarima_pred = sarima_pred.set_index(testset[:-2].index)

# Getting real values to lower limit
lower = inverse_difference(testset['Close_Box'], sarima_predict_conf_1.iloc[:, 0])
lower = inv_boxcox(lower, lmbda)

# Getting real values to upper limit
upper = inverse_difference(testset['Close_Box'], sarima_predict_conf_1.iloc[:, 1])
upper = inv_boxcox(upper, lmbda)


# Plot real values
# Plot of observed values
ax = data.Close.plot(label = 'Observed Values', color = '#2574BF')

# Plot of predicted values
sarima_pred[1:].plot(ax = ax, label = 'SARIMA', alpha = 0.7,
                 color = 'orange') 

# Confidence Interval
ax.fill_between(sarima_predict_conf_1.index[1:],
                # lower limit
                lower,
                # upper limit
                upper, color = 'k', alpha = 0.1)

# Title and legend
plt.title('Bitcoin Price Predictions with Seasonal ARIMA Model')
plt.xlabel('Date')
plt.ylabel('Bitcoin Prices')
plt.legend()
plt.show()


# Performance
arima_results = performance(testset['Close'].values, sarima_pred.values)
arima_results

MSE of predictions is 609001505.24
RMSE of predictions is 24677.96
MAPE of predictions is 99.7


# Create model with Anual Seasonality
model_prophet = Prophet(yearly_seasonality = True)


# Add Built-in Country Holiday
model_prophet.add_country_holidays(country_name = 'US')

<prophet.forecaster.Prophet at 0x7f28d8423160>


# format target dataset
t = data.cox_diff.reset_index()
t = t.rename(columns = {'Date': 'ds', 'cox_diff': 'y'})


# Train model
model_prophet.fit(t)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

<prophet.forecaster.Prophet at 0x7f28d8423160>


# Dataset for predictions
dataset_pred = model_prophet.make_future_dataframe(periods = 365, freq = 'd')
dataset_pred.count()

ds    2826
dtype: int64


# Forecast
forecast_model_prophet = model_prophet.predict(dataset_pred)


# Predictions
forecast_model_prophet[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


# Plot Predictions
fig = model_prophet.plot(forecast_model_prophet)

# Testset red
plt.plot(testset['cox_diff'], label = 'Test', color = 'red', linewidth = 2)
plt.show()


# Predictions
forecast_model_prophet.head()


# Clean zeros
forecast_model_prophet.drop(columns = ['multiplicative_terms', 
                                        'multiplicative_terms_lower',
                                        'multiplicative_terms_upper'], inplace = True)


# Plot

# Original Serie
plt.plot(data.cox_diff.index, 
         data.cox_diff.values,
         label = 'Observed Values',
         color = '#2574BF')

i1 = len(forecast_model_prophet)-len(testset)

# Predictions
plt.plot(testset.cox_diff.index, 
         forecast_model_prophet[i1:]['yhat'].values,
         label = 'Predictions with Prophet v2', 
         alpha = 0.7, 
         color = 'red')

# Confidence Limits
plt.fill_between(testset.cox_diff.index,
                 forecast_model_prophet[i1:]['yhat_lower'].values,
                 forecast_model_prophet[i1:]['yhat_upper'].values, 
                 color = 'k', 
                 alpha = 0.1)

plt.title('Forecasting with Prophet Model')
plt.xlabel('Date')
plt.ylabel('Prices')
plt.legend()
plt.show()


forecast_model_prophet.head()


# Performance
prophet_results = performance(testset.cox_diff.values, forecast_model_prophet[i1:]['yhat'])
prophet_results

MSE of predictions is 0.01
RMSE of predictions is 0.09
MAPE of predictions is 184.44


# Dataset to predictions - freq by day
dataset_pred = model_prophet.make_future_dataframe(periods = 365, freq = 'd')


# Forecast
forecast_model_prophet_60 = model_prophet.predict(dataset_pred)


# Plot Predictions
fig = model_prophet.plot(forecast_model_prophet_60)

# Testset red
plt.plot(testset['cox_diff'], label = 'Teste', color = 'red', linewidth = 2)
plt.show()


# getting real values
i2 = len(data['Close_Box'])
prophet_pred = inverse_difference(data['Close_Box'], forecast_model_prophet[:i2].yhat)
prophet_pred = inv_boxcox(prophet_pred, lmbda)
prophet_pred = pd.DataFrame(prophet_pred)
prophet_pred = pd.concat([x, prophet_pred])
prophet_pred = prophet_pred.reset_index()
prophet_pred = prophet_pred.set_index(data.index)

# Getting real values to lower limit
lower = inverse_difference(data['Close_Box'], forecast_model_prophet[:i2].yhat_lower)
lower = inv_boxcox(lower, lmbda)

# Getting real values to upper limit
upper = inverse_difference(data['Close_Box'], forecast_model_prophet[:i2].yhat_upper)
upper = inv_boxcox(upper, lmbda)


# Plot real values
# Plot of observed values
ax = data.Close.plot(label = 'Observed Values', color = '#2574BF')

# Plot of predicted values
prophet_pred[i1:][0].plot(ax = ax, label = 'Prophet', alpha = 0.7,
                 color = 'red') 

# Confidence Interval
ax.fill_between(prophet_pred.index[1:],
                # lower limit
                lower,
                # upper limit
                upper, color = 'k', alpha = 0.1)

# Title and legend
plt.title('Bitcoin Price Predictions with Prophet Model')
plt.xlabel('Date')
plt.ylabel('Bitcoin Prices')
plt.legend()
plt.show()

	Open	High	Low	Close	Adj Close	Volume
Date
2015-01-01	320.434998	320.434998	314.002991	314.248993	314.248993	8036550
2015-01-02	314.079010	315.838989	313.565002	315.032013	315.032013	7860650
2015-01-03	314.846008	315.149994	281.082001	281.082001	281.082001	33054400
2015-01-04	281.145996	287.230011	257.612000	264.195007	264.195007	55629100
2015-01-05	265.084015	278.341003	265.084015	274.473999	274.473999	43962800

	Seasonal	Trend	Resid	Observed_Values
Date
2015-01-01	1.419184	91.147847	2.429343	314.248993
2015-01-02	1.407753	91.980494	2.432946	315.032013
2015-01-03	1.383449	92.813141	2.189074	281.082001
2015-01-04	1.376423	93.645789	2.049673	264.195007
2015-01-05	1.421696	94.478436	2.043440	274.473999

	Open	High	Low	Close	Adj Close	Volume	Close_Box	cox_diff
Date
2020-05-23	9185.062500	9302.501953	9118.108398	9209.287109	9209.287109	27727866812	13.403961	0.005995
2020-05-24	9212.283203	9288.404297	8787.250977	8790.368164	8790.368164	32518803300	13.308030	-0.095931
2020-05-25	8786.107422	8951.005859	8719.667969	8906.934570	8906.934570	31288157264	13.335139	0.027109
2020-05-26	8909.585938	8991.967773	8757.293945	8835.052734	8835.052734	29584186947	13.318460	-0.016678
2020-05-27	8837.380859	9203.320312	8834.157227	9181.017578	9181.017578	32740536902	13.397615	0.079154

	cox_diff	naive_prediction
Date
2020-05-23	0.005995	0.022775
2020-05-24	-0.095931	0.022775
2020-05-25	0.027109	0.022775
2020-05-26	-0.016678	0.022775
2020-05-27	0.079154	0.022775

Model:	ARIMA	BIC:	-3928.7627
Dependent Variable:	D.cox_diff	Log-Likelihood:	1979.5
Date:	2021-09-30 13:36	Scale:	1.0000
No. Observations:	1967	Method:	css-mle
Df Model:	3	Sample:	1
Df Residuals:	1964		8
Converged:	1.0000	S.D. of innovations:	0.088
No. Iterations:	15.0000	HQIC:	-3942.891
AIC:	-3951.0997

Forecasting Bitcoin Prices with Time Series¶

Natália Faraj Murad¶

Load the data¶

Stationarity¶

Decomposition¶

Autocorrelation¶

Box Cox Transformation¶

Differentiation¶

Function to evaluate performance of the model¶

Split Train and Test¶

Naive Bayes¶

ARIMA¶

ARIMA Model Performance¶

SARIMA - Seasonal Arima¶

SARIMA Performance¶

Prophet¶

	Coef.	Std.Err.	t	P>\|t\|	[0.025	0.975]
const	-0.0000	0.0010	-0.0005	0.9996	-0.0019	0.0019
ar.L1.D.cox_diff	-0.6890	0.0212	-32.4444	0.0000	-0.7306	-0.6474
ar.L2.D.cox_diff	-0.3367	0.0212	-15.8460	0.0000	-0.3783	-0.2950

	Real	Imaginary	Modulus	Frequency
AR.1	-1.0233	-1.3868	1.7235	-0.3512
AR.2	-1.0233	1.3868	1.7235	0.3512

	lower cox_diff	upper cox_diff
Date
2020-05-20	-0.186857	0.170240
2020-05-21	-0.185851	0.171246
2020-05-22	-0.259475	0.097622
2020-05-23	-0.060873	0.296224
2020-05-24	-0.162407	0.194690
...	...	...
2021-09-21	-0.191933	0.165164
2021-09-22	-0.226468	0.130630
2021-09-23	-0.202585	0.154513
2021-09-24	-0.131933	0.225164
2021-09-25	-0.129460	0.227637

	ds	yhat	yhat_lower	yhat_upper
2821	2022-09-26	0.005081	-0.097506	0.104147
2822	2022-09-27	-0.003191	-0.105380	0.097730
2823	2022-09-28	0.001684	-0.095455	0.107370
2824	2022-09-29	0.001515	-0.102826	0.100399
2825	2022-09-30	0.006634	-0.086259	0.109035

	ds	trend	yhat_lower	yhat_upper	trend_lower	trend_upper	New Year's Day	New Year's Day_lower	New Year's Day_upper	additive_terms	additive_terms_lower	additive_terms_upper	holidays	holidays_lower	holidays_upper	weekly	weekly_lower	weekly_upper	yearly	yearly_lower	yearly_upper	yhat
0	2015-01-01	0.001641	-0.089281	0.112805	0.001641	0.001641	0.005946	0.005946	0.005946	0.006156	0.006156	0.006156	0.005946	0.005946	0.005946	-0.000979	-0.000979	-0.000979	0.001189	0.001189	0.001189	0.007797
1	2015-01-02	0.001646	-0.091687	0.110663	0.001646	0.001646	0.000000	0.000000	0.000000	0.003357	0.003357	0.003357	0.000000	0.000000	0.000000	0.003254	0.003254	0.003254	0.000103	0.000103	0.000103	0.005004
2	2015-01-03	0.001651	-0.103322	0.102166	0.001651	0.001651	0.000000	0.000000	0.000000	0.000870	0.000870	0.000870	0.000000	0.000000	0.000000	0.001898	0.001898	0.001898	-0.001028	-0.001028	-0.001028	0.002521
3	2015-01-04	0.001656	-0.101245	0.095351	0.001656	0.001656	0.000000	0.000000	0.000000	-0.007162	-0.007162	-0.007162	0.000000	0.000000	0.000000	-0.004962	-0.004962	-0.004962	-0.002200	-0.002200	-0.002200	-0.005506
4	2015-01-05	0.001661	-0.099529	0.096545	0.001661	0.001661	0.000000	0.000000	0.000000	0.001464	0.001464	0.001464	0.000000	0.000000	0.000000	0.004871	0.004871	0.004871	-0.003407	-0.003407	-0.003407	0.003125