Source of bitcoin data is https://blockchain.info/stats
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.2
## corrplot 0.84 loaded
library(ggplot2)
library(ggfortify)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.2
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(car)
## Warning: package 'car' was built under R version 3.4.4
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.4.4
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(reshape2) # for melt
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(animation)
library(feather)
Read the dataset
bitcoin = read.csv("bitcoin_data.csv", header = TRUE)
str(bitcoin)
## 'data.frame': 1711 obs. of 13 variables:
## $ Date : Factor w/ 1711 levels "1/1/11","1/1/14",..: 110 130 140 150 10 20 30 40 50 65 ...
## $ Day : int 3 5 7 9 11 13 15 17 19 21 ...
## $ Week : int 1 2 2 2 3 3 3 3 4 4 ...
## $ Price : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Transaction_fees..BTC. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ No_of_transactions : int 1 0 0 14 106 116 136 109 120 115 ...
## $ Output_value..BTC. : num 0 0 0 0 329 ...
## $ estimated_transaction_value: num 0 0 0 0 0 0 0 0 0 0 ...
## $ Miners_revenue : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Cost_per_transaction : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Difficulty : num 1 0 0 1 1 1 1 1 1 1 ...
## $ Hash_rate : num 4.97e-08 0.00 0.00 6.96e-07 5.27e-06 5.72e-06 6.31e-06 5.37e-06 5.92e-06 5.62e-06 ...
## $ Trade_volume : num 0 0 0 0 0 0 0 0 0 0 ...
Modify the column names to remove “_btc"
bitcoin$Date <- as.Date(bitcoin$Date, format = "%m/%d/%y")
## Warning in strptime(x, format, tz = "GMT"): unknown timezone 'zone/tz/
## 2018c.1.0/zoneinfo/America/Los_Angeles'
head(bitcoin)
## Date Day Week Price Transaction_fees..BTC. No_of_transactions
## 1 2009-01-03 3 1 0 0 1
## 2 2009-01-05 5 2 0 0 0
## 3 2009-01-07 7 2 0 0 0
## 4 2009-01-09 9 2 0 0 14
## 5 2009-01-11 11 3 0 0 106
## 6 2009-01-13 13 3 0 0 116
## Output_value..BTC. estimated_transaction_value Miners_revenue
## 1 0.0000 0 0
## 2 0.0000 0 0
## 3 0.0000 0 0
## 4 0.0000 0 0
## 5 328.5714 0 0
## 6 1743.0000 0 0
## Cost_per_transaction Difficulty Hash_rate Trade_volume
## 1 0 1 4.97e-08 0
## 2 0 0 0.00e+00 0
## 3 0 0 0.00e+00 0
## 4 0 1 6.96e-07 0
## 5 0 1 5.27e-06 0
## 6 0 1 5.72e-06 0
anyNA(bitcoin)
## [1] FALSE
Get summary of the columns in bitcoin dataset
summary(bitcoin)
## Date Day Week Price
## Min. :2009-01-03 Min. : 1.0 Min. : 1.0 Min. : 0.000
## 1st Qu.:2011-05-08 1st Qu.: 86.0 1st Qu.:13.0 1st Qu.: 2.528
## Median :2013-09-09 Median :176.0 Median :26.0 Median : 128.100
## Mean :2013-09-09 Mean :178.7 Mean :26.4 Mean : 986.565
## 3rd Qu.:2016-01-12 3rd Qu.:271.0 3rd Qu.:40.0 3rd Qu.: 586.330
## Max. :2018-05-16 Max. :366.0 Max. :53.0 Max. :19289.785
## Transaction_fees..BTC. No_of_transactions Output_value..BTC.
## Min. : 0.000 Min. : 0 Min. : 0
## 1st Qu.: 3.252 1st Qu.: 4380 1st Qu.: 280139
## Median : 16.172 Median : 55371 Median : 850113
## Mean : 52.955 Mean : 92739 Mean : 1171801
## 3rd Qu.: 44.526 3rd Qu.:176783 3rd Qu.: 1658677
## Max. :1128.762 Max. :425008 Max. :21158969
## estimated_transaction_value Miners_revenue Cost_per_transaction
## Min. :0.000e+00 Min. : 0 Min. : 0.000
## 1st Qu.:2.704e+05 1st Qu.: 15842 1st Qu.: 2.293
## Median :2.264e+07 Median : 607245 Median : 7.191
## Mean :1.782e+08 Mean : 2387408 Mean : 15.239
## 3rd Qu.:1.111e+08 3rd Qu.: 1800091 3rd Qu.: 14.148
## Max. :3.987e+09 Max. :53191582 Max. :146.595
## Difficulty Hash_rate Trade_volume
## Min. :0.000e+00 Min. : 0 Min. :0.000e+00
## 1st Qu.:1.097e+05 1st Qu.: 1 1st Qu.:5.952e+04
## Median :8.693e+07 Median : 834 Median :6.144e+06
## Mean :2.436e+11 Mean : 1857248 Mean :8.856e+07
## 3rd Qu.:1.085e+11 3rd Qu.: 830330 3rd Qu.:2.620e+07
## Max. :4.140e+12 Max. :36872804 Max. :5.352e+09
There are no missing values in data
ggplot(bitcoin, aes(bitcoin$Date, bitcoin$Price,)) +
geom_point(color = "dark blue") +
ggtitle("Bitcoin values in USD trend") +
labs ( x="Date" , y="Bitcoin price in USD") +
theme_minimal()
cor = cor(bitcoin[,c(2:13)])
corrplot(cor, method = "square", type="upper", tl.srt = 70, tl.col = "black", tl.cex = 0.5, title = "Correlation of Variables")
correlation = as.data.frame(cor)
highly_correlated = correlation[correlation$Price>= 0.75, ]
rownames(highly_correlated)
## [1] "Price" "estimated_transaction_value"
## [3] "Miners_revenue" "Cost_per_transaction"
## [5] "Difficulty" "Hash_rate"
## [7] "Trade_volume"
These are the highly correlated variables to bitcoin market price
Additive model with all variables
fit.lm1 = lm(Price ~., data=bitcoin)
summary(fit.lm1)
##
## Call:
## lm(formula = Price ~ ., data = bitcoin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1373.49 -38.98 5.00 49.31 2708.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.356e+02 2.202e+02 3.795 0.000153 ***
## Date -5.308e-02 1.463e-02 -3.628 0.000294 ***
## Day 1.044e+01 1.807e+00 5.778 8.97e-09 ***
## Week -7.184e+01 1.263e+01 -5.687 1.52e-08 ***
## Transaction_fees..BTC. -1.007e+00 9.109e-02 -11.052 < 2e-16 ***
## No_of_transactions 3.111e-04 1.521e-04 2.046 0.040954 *
## Output_value..BTC. -8.394e-06 3.394e-06 -2.474 0.013474 *
## estimated_transaction_value 8.710e-07 5.009e-08 17.388 < 2e-16 ***
## Miners_revenue 3.120e-04 4.919e-06 63.417 < 2e-16 ***
## Cost_per_transaction -3.503e+00 5.714e-01 -6.132 1.08e-09 ***
## Difficulty 2.437e-09 7.233e-11 33.685 < 2e-16 ***
## Hash_rate -1.991e-04 9.975e-06 -19.960 < 2e-16 ***
## Trade_volume -7.904e-08 2.943e-08 -2.686 0.007305 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 194 on 1698 degrees of freedom
## Multiple R-squared: 0.9944, Adjusted R-squared: 0.9943
## F-statistic: 2.499e+04 on 12 and 1698 DF, p-value: < 2.2e-16
R^2 is 99%, which might indicate overfitting, some variables like trade volume, output value and estimated transaction value are insignificant in the model.
autoplot(fit.lm1, label = 0, col = "goldenrod1") +
theme_minimal()
Residuals indicate heteroscedasticity and the data deviates from normal.
Running step function to get a model with the lowest AIC
fit_full = lm(Price~., data=bitcoin)
step(fit_full)
## Start: AIC=18038.74
## Price ~ Date + Day + Week + Transaction_fees..BTC. + No_of_transactions +
## Output_value..BTC. + estimated_transaction_value + Miners_revenue +
## Cost_per_transaction + Difficulty + Hash_rate + Trade_volume
##
## Df Sum of Sq RSS AIC
## <none> 63875221 18039
## - No_of_transactions 1 157404 64032624 18041
## - Output_value..BTC. 1 230168 64105388 18043
## - Trade_volume 1 271371 64146592 18044
## - Date 1 495113 64370334 18050
## - Week 1 1216459 65091680 18069
## - Day 1 1255952 65131173 18070
## - Cost_per_transaction 1 1414393 65289614 18074
## - Transaction_fees..BTC. 1 4594609 68469830 18156
## - estimated_transaction_value 1 11373251 75248471 18317
## - Hash_rate 1 14987001 78862221 18397
## - Difficulty 1 42684308 106559529 18912
## - Miners_revenue 1 151287467 215162688 20115
##
## Call:
## lm(formula = Price ~ Date + Day + Week + Transaction_fees..BTC. +
## No_of_transactions + Output_value..BTC. + estimated_transaction_value +
## Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate +
## Trade_volume, data = bitcoin)
##
## Coefficients:
## (Intercept) Date
## 8.356e+02 -5.308e-02
## Day Week
## 1.044e+01 -7.184e+01
## Transaction_fees..BTC. No_of_transactions
## -1.007e+00 3.111e-04
## Output_value..BTC. estimated_transaction_value
## -8.394e-06 8.710e-07
## Miners_revenue Cost_per_transaction
## 3.120e-04 -3.503e+00
## Difficulty Hash_rate
## 2.437e-09 -1.991e-04
## Trade_volume
## -7.904e-08
Fit the model with lowest AIC and check for residuals
fit.lm2= lm(formula = Price ~ Date + Day + Week + Transaction_fees..BTC. +
No_of_transactions + Output_value..BTC. + estimated_transaction_value +
Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate, , data = bitcoin)
summary(fit.lm2)
##
## Call:
## lm(formula = Price ~ Date + Day + Week + Transaction_fees..BTC. +
## No_of_transactions + Output_value..BTC. + estimated_transaction_value +
## Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate,
## data = bitcoin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1364.96 -38.73 5.20 48.33 2666.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.430e+02 2.206e+02 3.821 0.000137 ***
## Date -5.362e-02 1.466e-02 -3.659 0.000261 ***
## Day 1.030e+01 1.809e+00 5.690 1.50e-08 ***
## Week -7.084e+01 1.265e+01 -5.600 2.50e-08 ***
## Transaction_fees..BTC. -1.022e+00 9.109e-02 -11.217 < 2e-16 ***
## No_of_transactions 3.492e-04 1.517e-04 2.302 0.021468 *
## Output_value..BTC. -8.377e-06 3.400e-06 -2.464 0.013839 *
## estimated_transaction_value 8.489e-07 4.951e-08 17.149 < 2e-16 ***
## Miners_revenue 3.100e-04 4.875e-06 63.598 < 2e-16 ***
## Cost_per_transaction -3.464e+00 5.722e-01 -6.053 1.75e-09 ***
## Difficulty 2.401e-09 7.126e-11 33.697 < 2e-16 ***
## Hash_rate -1.949e-04 9.871e-06 -19.747 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 194.3 on 1699 degrees of freedom
## Multiple R-squared: 0.9943, Adjusted R-squared: 0.9943
## F-statistic: 2.716e+04 on 11 and 1699 DF, p-value: < 2.2e-16
All the variables are significant in the model
autoplot(fit.lm2, label = 0, col = "goldenrod1") +
theme_minimal()
Residuals indicate heteroscedasticity and the data deviates from normal.
Polynomial transaction will not make much of an impact on the data.
Useful link on heteroscedasticity of time series data http://statisticsbyjim.com/regression/heteroscedasticity-regression/
Another method that can be useful in solving heteroscedasticity is weighted regression, https://newonlinecourses.science.psu.edu/stat501/node/431/
wts = 1/fitted(lm(abs(residuals(fit.lm2)) ~ fitted(fit.lm2)))^2
fit.lm3= lm(formula = Price ~ Date + Day + Week + Transaction_fees..BTC. +
No_of_transactions + Output_value..BTC. + estimated_transaction_value +
Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate, data = bitcoin, weights=wts)
autoplot(fit.lm3, label = 0, col = "goldenrod1") +
theme_minimal()
Applied weighted regression did not help much.
For time series data, these forms of transition helps in making the data stationary https://datascienceplus.com/time-series-analysis-in-r-part-2-time-series-transformations/
Transforming the variables to % changes
bitcoin_final <- subset(bitcoin, bitcoin$Price>0)
bitcoin_final = mutate(bitcoin_final, pChange=(bitcoin_final$Price-lag(bitcoin_final$Price))/lag(bitcoin_final$Price)*100)
bitcoin_final = mutate(bitcoin_final, Transaction_fees..BTC._pChange=(bitcoin_final$Transaction_fees..BTC.-lag(bitcoin_final$Transaction_fees..BTC.))/lag(bitcoin_final$Transaction_fees..BTC.)*100)
bitcoin_final = mutate(bitcoin_final, Output_value..BTC._pChange=(bitcoin_final$Output_value..BTC.-lag(bitcoin_final$Output_value..BTC.))/lag(bitcoin_final$Output_value..BTC.)*100)
bitcoin_final = mutate(bitcoin_final, estimated_transaction_value_pChange=(bitcoin_final$estimated_transaction_value-lag(bitcoin_final$estimated_transaction_value))/lag(bitcoin_final$estimated_transaction_value)*100)
bitcoin_final = mutate(bitcoin_final, Miners_revenue_pChange=(bitcoin_final$Miners_revenue-lag(bitcoin_final$Miners_revenue))/lag(bitcoin_final$Miners_revenue)*100)
bitcoin_final = mutate(bitcoin_final, No_of_transactions_pChange=(bitcoin_final$No_of_transactions-lag(bitcoin_final$No_of_transactions))/lag(bitcoin_final$No_of_transactions)*100)
bitcoin_final = mutate(bitcoin_final, Cost_per_transaction_pChange=(bitcoin_final$Cost_per_transaction-lag(bitcoin_final$Cost_per_transaction))/lag(bitcoin_final$Cost_per_transaction)*100)
bitcoin_final = mutate(bitcoin_final, Difficulty_pChange=(bitcoin_final$Difficulty-lag(bitcoin_final$Difficulty))/lag(bitcoin_final$Difficulty)*100)
bitcoin_final = mutate(bitcoin_final, Hash_rate_pChange=(bitcoin_final$Hash_rate-lag(bitcoin_final$Hash_rate))/lag(bitcoin_final$Hash_rate)*100)
is.na(bitcoin_final) = sapply(bitcoin_final, is.infinite)
bitcoin_final[is.na(bitcoin_final)]=0
summary(bitcoin_final)
## Date Day Week Price
## Min. :2010-08-18 Min. : 1.0 Min. : 1 Min. : 0.061
## 1st Qu.:2012-07-25 1st Qu.: 89.0 1st Qu.:14 1st Qu.: 11.805
## Median :2014-07-02 Median :183.0 Median :27 Median : 270.680
## Mean :2014-07-02 Mean :183.1 Mean :27 Mean : 1192.942
## 3rd Qu.:2016-06-08 3rd Qu.:277.0 3rd Qu.:40 3rd Qu.: 645.682
## Max. :2018-05-16 Max. :366.0 Max. :53 Max. :19289.785
## Transaction_fees..BTC. No_of_transactions Output_value..BTC.
## Min. : 0.00 Min. : 271 Min. : 23342
## 1st Qu.: 11.83 1st Qu.: 28590 1st Qu.: 580806
## Median : 25.05 Median : 69721 Median : 1070697
## Mean : 64.02 Mean :112102 Mean : 1413487
## 3rd Qu.: 54.43 3rd Qu.:203423 3rd Qu.: 1800706
## Max. :1128.76 Max. :425008 Max. :21158969
## estimated_transaction_value Miners_revenue Cost_per_transaction
## Min. :5.300e+02 Min. : 371 Min. : 0.1349
## 1st Qu.:1.916e+06 1st Qu.: 78499 1st Qu.: 5.1827
## Median :4.252e+07 Median : 1007436 Median : 8.3437
## Mean :2.155e+08 Mean : 2886823 Mean : 18.4272
## 3rd Qu.:1.386e+08 3rd Qu.: 2091327 3rd Qu.: 18.7835
## Max. :3.987e+09 Max. :53191582 Max. :146.5951
## Difficulty Hash_rate Trade_volume
## Min. :5.120e+02 Min. : 0 Min. :4.200e+01
## 1st Qu.:1.889e+06 1st Qu.: 15 1st Qu.:4.750e+05
## Median :1.682e+10 Median : 124571 Median :1.295e+07
## Mean :2.946e+11 Mean : 2245760 Mean :1.071e+08
## 3rd Qu.:1.990e+11 3rd Qu.: 1441005 3rd Qu.:3.445e+07
## Max. :4.140e+12 Max. :36872804 Max. :5.352e+09
## pChange Transaction_fees..BTC._pChange
## Min. :-65.2000 Min. : -100
## 1st Qu.: -2.1220 1st Qu.: -16
## Median : 0.3511 Median : 0
## Mean : 1.2657 Mean : 56554
## 3rd Qu.: 3.3566 3rd Qu.: 18
## Max. :182.2125 Max. :79999900
## Output_value..BTC._pChange estimated_transaction_value_pChange
## Min. :-86.2040 Min. :-79.5447
## 1st Qu.: -6.9524 1st Qu.: -6.7300
## Median : 0.5872 Median : 0.6419
## Mean : 2.4181 Mean : 3.0586
## 3rd Qu.: 8.3175 3rd Qu.: 8.6480
## Max. :316.1967 Max. :405.0760
## Miners_revenue_pChange No_of_transactions_pChange
## Min. :-63.7608 Min. : -96.7944
## 1st Qu.: -8.5491 1st Qu.: -10.6309
## Median : 0.5814 Median : -0.1394
## Mean : 2.1243 Mean : 6.4857
## 3rd Qu.: 10.8179 3rd Qu.: 11.6223
## Max. :223.5119 Max. :2929.3233
## Cost_per_transaction_pChange Difficulty_pChange Hash_rate_pChange
## Min. : -98.465 Min. :-18.031 Min. :-52.778
## 1st Qu.: -11.916 1st Qu.: 0.000 1st Qu.: -6.122
## Median : 0.917 Median : 0.000 Median : 1.442
## Mean : 8.633 Mean : 1.829 Mean : 2.506
## 3rd Qu.: 15.146 3rd Qu.: 0.000 3rd Qu.: 10.423
## Max. :4135.040 Max. : 78.146 Max. : 75.000
fit.lm4= lm(formula = pChange ~ ., data = bitcoin_final[,-4])
summary(fit.lm4)
##
## Call:
## lm(formula = pChange ~ ., data = bitcoin_final[, -4])
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.177 -0.698 -0.274 0.416 48.436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.090e+00 5.115e+00 -0.409 0.68296
## Date 1.611e-04 3.323e-04 0.485 0.62790
## Day 5.063e-02 2.977e-02 1.701 0.08919
## Week -3.526e-01 2.081e-01 -1.695 0.09038
## Transaction_fees..BTC. -4.364e-03 1.437e-03 -3.037 0.00243
## No_of_transactions 6.095e-07 2.859e-06 0.213 0.83124
## Output_value..BTC. 1.081e-08 5.344e-08 0.202 0.83968
## estimated_transaction_value 1.287e-09 7.804e-10 1.649 0.09936
## Miners_revenue -1.076e-07 7.645e-08 -1.408 0.15940
## Cost_per_transaction 9.504e-03 9.013e-03 1.054 0.29185
## Difficulty -5.706e-12 1.162e-12 -4.911 1.01e-06
## Hash_rate 7.500e-07 1.598e-07 4.694 2.95e-06
## Trade_volume -5.668e-11 4.547e-10 -0.125 0.90082
## Transaction_fees..BTC._pChange 2.564e-08 3.759e-08 0.682 0.49520
## Output_value..BTC._pChange 5.551e-03 4.284e-03 1.296 0.19521
## estimated_transaction_value_pChange -2.153e-03 4.158e-03 -0.518 0.60463
## Miners_revenue_pChange 8.934e-01 7.451e-03 119.905 < 2e-16
## No_of_transactions_pChange -3.125e-03 7.455e-04 -4.192 2.94e-05
## Cost_per_transaction_pChange -3.102e-04 5.266e-04 -0.589 0.55600
## Difficulty_pChange 7.337e-01 1.291e-02 56.848 < 2e-16
## Hash_rate_pChange -8.712e-01 9.724e-03 -89.595 < 2e-16
##
## (Intercept)
## Date
## Day .
## Week .
## Transaction_fees..BTC. **
## No_of_transactions
## Output_value..BTC.
## estimated_transaction_value .
## Miners_revenue
## Cost_per_transaction
## Difficulty ***
## Hash_rate ***
## Trade_volume
## Transaction_fees..BTC._pChange
## Output_value..BTC._pChange
## estimated_transaction_value_pChange
## Miners_revenue_pChange ***
## No_of_transactions_pChange ***
## Cost_per_transaction_pChange
## Difficulty_pChange ***
## Hash_rate_pChange ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.993 on 1394 degrees of freedom
## Multiple R-squared: 0.914, Adjusted R-squared: 0.9127
## F-statistic: 740.4 on 20 and 1394 DF, p-value: < 2.2e-16
Removing the insignificant variables one by one, we arrive at the following model or by using step function to get the model with lowest AIC value
step(fit.lm4)
## Start: AIC=3123.77
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + No_of_transactions +
## Output_value..BTC. + estimated_transaction_value + Miners_revenue +
## Cost_per_transaction + Difficulty + Hash_rate + Trade_volume +
## Transaction_fees..BTC._pChange + Output_value..BTC._pChange +
## estimated_transaction_value_pChange + Miners_revenue_pChange +
## No_of_transactions_pChange + Cost_per_transaction_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Trade_volume 1 0 12492 3121.8
## - Output_value..BTC. 1 0 12492 3121.8
## - No_of_transactions 1 0 12492 3121.8
## - Date 1 2 12494 3122.0
## - estimated_transaction_value_pChange 1 2 12494 3122.0
## - Cost_per_transaction_pChange 1 3 12495 3122.1
## - Transaction_fees..BTC._pChange 1 4 12496 3122.2
## - Cost_per_transaction 1 10 12502 3122.9
## - Output_value..BTC._pChange 1 15 12507 3123.5
## <none> 12492 3123.8
## - Miners_revenue 1 18 12509 3123.8
## - estimated_transaction_value 1 24 12516 3124.5
## - Week 1 26 12517 3124.7
## - Day 1 26 12518 3124.7
## - Transaction_fees..BTC. 1 83 12574 3131.1
## - No_of_transactions_pChange 1 157 12649 3139.5
## - Hash_rate 1 197 12689 3144.0
## - Difficulty 1 216 12708 3146.0
## - Difficulty_pChange 1 28959 41451 4819.0
## - Hash_rate_pChange 1 71931 84423 5825.5
## - Miners_revenue_pChange 1 128834 141325 6554.6
##
## Step: AIC=3121.78
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + No_of_transactions +
## Output_value..BTC. + estimated_transaction_value + Miners_revenue +
## Cost_per_transaction + Difficulty + Hash_rate + Transaction_fees..BTC._pChange +
## Output_value..BTC._pChange + estimated_transaction_value_pChange +
## Miners_revenue_pChange + No_of_transactions_pChange + Cost_per_transaction_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Output_value..BTC. 1 0 12492 3119.8
## - No_of_transactions 1 0 12492 3119.8
## - Date 1 2 12494 3120.0
## - estimated_transaction_value_pChange 1 2 12494 3120.1
## - Cost_per_transaction_pChange 1 3 12495 3120.1
## - Transaction_fees..BTC._pChange 1 4 12496 3120.3
## - Cost_per_transaction 1 10 12502 3120.9
## - Output_value..BTC._pChange 1 15 12507 3121.5
## <none> 12492 3121.8
## - Miners_revenue 1 19 12510 3121.9
## - estimated_transaction_value 1 24 12516 3122.5
## - Week 1 26 12517 3122.7
## - Day 1 26 12518 3122.7
## - Transaction_fees..BTC. 1 83 12575 3129.2
## - No_of_transactions_pChange 1 158 12649 3137.5
## - Hash_rate 1 204 12696 3142.7
## - Difficulty 1 226 12717 3145.1
## - Difficulty_pChange 1 28965 41457 4817.2
## - Hash_rate_pChange 1 71965 84456 5824.1
## - Miners_revenue_pChange 1 128834 141326 6552.6
##
## Step: AIC=3119.82
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + No_of_transactions +
## estimated_transaction_value + Miners_revenue + Cost_per_transaction +
## Difficulty + Hash_rate + Transaction_fees..BTC._pChange +
## Output_value..BTC._pChange + estimated_transaction_value_pChange +
## Miners_revenue_pChange + No_of_transactions_pChange + Cost_per_transaction_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - No_of_transactions 1 1 12493 3117.9
## - Date 1 2 12494 3118.1
## - estimated_transaction_value_pChange 1 3 12495 3118.1
## - Cost_per_transaction_pChange 1 3 12495 3118.2
## - Transaction_fees..BTC._pChange 1 4 12496 3118.3
## - Cost_per_transaction 1 10 12502 3118.9
## - Output_value..BTC._pChange 1 16 12508 3119.6
## <none> 12492 3119.8
## - Miners_revenue 1 19 12511 3119.9
## - estimated_transaction_value 1 25 12517 3120.6
## - Week 1 27 12519 3120.8
## - Day 1 27 12519 3120.9
## - Transaction_fees..BTC. 1 84 12576 3127.3
## - No_of_transactions_pChange 1 158 12650 3135.6
## - Hash_rate 1 204 12697 3140.8
## - Difficulty 1 226 12718 3143.2
## - Difficulty_pChange 1 28977 41469 4815.6
## - Hash_rate_pChange 1 71972 84465 5822.2
## - Miners_revenue_pChange 1 128858 141350 6550.8
##
## Step: AIC=3117.88
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + estimated_transaction_value +
## Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate +
## Transaction_fees..BTC._pChange + Output_value..BTC._pChange +
## estimated_transaction_value_pChange + Miners_revenue_pChange +
## No_of_transactions_pChange + Cost_per_transaction_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - estimated_transaction_value_pChange 1 3 12495 3116.2
## - Cost_per_transaction_pChange 1 3 12496 3116.2
## - Transaction_fees..BTC._pChange 1 4 12497 3116.4
## - Cost_per_transaction 1 10 12503 3117.0
## - Output_value..BTC._pChange 1 16 12509 3117.7
## <none> 12493 3117.9
## - Miners_revenue 1 18 12511 3118.0
## - estimated_transaction_value 1 25 12518 3118.8
## - Week 1 26 12519 3118.8
## - Date 1 26 12519 3118.8
## - Day 1 26 12519 3118.9
## - Transaction_fees..BTC. 1 91 12584 3126.2
## - No_of_transactions_pChange 1 158 12650 3133.6
## - Hash_rate 1 213 12706 3139.8
## - Difficulty 1 235 12728 3142.3
## - Difficulty_pChange 1 29273 41766 4823.7
## - Hash_rate_pChange 1 72139 84631 5823.0
## - Miners_revenue_pChange 1 130387 142879 6564.0
##
## Step: AIC=3116.17
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + estimated_transaction_value +
## Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate +
## Transaction_fees..BTC._pChange + Output_value..BTC._pChange +
## Miners_revenue_pChange + No_of_transactions_pChange + Cost_per_transaction_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Cost_per_transaction_pChange 1 3 12498 3114.5
## - Transaction_fees..BTC._pChange 1 4 12499 3114.7
## - Cost_per_transaction 1 10 12505 3115.3
## - Output_value..BTC._pChange 1 15 12510 3115.8
## - Miners_revenue 1 18 12513 3116.2
## <none> 12495 3116.2
## - estimated_transaction_value 1 25 12520 3116.9
## - Week 1 26 12522 3117.2
## - Day 1 27 12522 3117.2
## - Date 1 28 12523 3117.3
## - Transaction_fees..BTC. 1 92 12587 3124.5
## - No_of_transactions_pChange 1 156 12651 3131.7
## - Hash_rate 1 212 12707 3138.0
## - Difficulty 1 235 12730 3140.5
## - Difficulty_pChange 1 29303 41798 4822.8
## - Hash_rate_pChange 1 72139 84634 5821.1
## - Miners_revenue_pChange 1 130385 142880 6562.0
##
## Step: AIC=3114.51
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + estimated_transaction_value +
## Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate +
## Transaction_fees..BTC._pChange + Output_value..BTC._pChange +
## Miners_revenue_pChange + No_of_transactions_pChange + Difficulty_pChange +
## Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Transaction_fees..BTC._pChange 1 4 12502 3113.0
## - Cost_per_transaction 1 10 12508 3113.6
## - Output_value..BTC._pChange 1 14 12512 3114.0
## - Miners_revenue 1 17 12515 3114.5
## <none> 12498 3114.5
## - estimated_transaction_value 1 24 12522 3115.2
## - Week 1 27 12525 3115.5
## - Day 1 27 12525 3115.5
## - Date 1 29 12527 3115.8
## - Transaction_fees..BTC. 1 91 12590 3122.8
## - No_of_transactions_pChange 1 154 12652 3129.8
## - Hash_rate 1 213 12711 3136.4
## - Difficulty 1 236 12734 3139.0
## - Difficulty_pChange 1 29336 41834 4822.0
## - Hash_rate_pChange 1 72593 85091 5826.7
## - Miners_revenue_pChange 1 130647 143146 6562.7
##
## Step: AIC=3112.99
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + estimated_transaction_value +
## Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate +
## Output_value..BTC._pChange + Miners_revenue_pChange + No_of_transactions_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Cost_per_transaction 1 10 12512 3112.1
## - Output_value..BTC._pChange 1 13 12516 3112.5
## - Miners_revenue 1 17 12520 3113.0
## <none> 12502 3113.0
## - estimated_transaction_value 1 24 12527 3113.7
## - Week 1 26 12528 3113.9
## - Day 1 26 12529 3113.9
## - Date 1 28 12530 3114.1
## - Transaction_fees..BTC. 1 91 12593 3121.2
## - No_of_transactions_pChange 1 154 12657 3128.4
## - Hash_rate 1 214 12717 3135.0
## - Difficulty 1 237 12739 3137.5
## - Difficulty_pChange 1 29334 41836 4820.1
## - Hash_rate_pChange 1 72713 85215 5826.7
## - Miners_revenue_pChange 1 130660 143162 6560.8
##
## Step: AIC=3112.08
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + estimated_transaction_value +
## Miners_revenue + Difficulty + Hash_rate + Output_value..BTC._pChange +
## Miners_revenue_pChange + No_of_transactions_pChange + Difficulty_pChange +
## Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Miners_revenue 1 8 12520 3111.0
## - Output_value..BTC._pChange 1 12 12524 3111.5
## - estimated_transaction_value 1 18 12530 3112.1
## <none> 12512 3112.1
## - Week 1 27 12539 3113.1
## - Day 1 27 12539 3113.2
## - Date 1 30 12542 3113.4
## - Transaction_fees..BTC. 1 113 12625 3122.8
## - No_of_transactions_pChange 1 159 12671 3127.9
## - Hash_rate 1 205 12717 3133.1
## - Difficulty 1 227 12739 3135.5
## - Difficulty_pChange 1 29464 41976 4822.8
## - Hash_rate_pChange 1 73094 85606 5831.2
## - Miners_revenue_pChange 1 130812 143325 6560.4
##
## Step: AIC=3111
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + estimated_transaction_value +
## Difficulty + Hash_rate + Output_value..BTC._pChange + Miners_revenue_pChange +
## No_of_transactions_pChange + Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - estimated_transaction_value 1 12 12532 3110.4
## - Output_value..BTC._pChange 1 13 12533 3110.5
## <none> 12520 3111.0
## - Week 1 28 12548 3112.1
## - Day 1 28 12548 3112.1
## - Date 1 32 12553 3112.7
## - Transaction_fees..BTC. 1 107 12627 3121.0
## - No_of_transactions_pChange 1 159 12679 3126.8
## - Hash_rate 1 200 12720 3131.5
## - Difficulty 1 220 12740 3133.6
## - Difficulty_pChange 1 29469 41989 4821.2
## - Hash_rate_pChange 1 73091 85612 5829.3
## - Miners_revenue_pChange 1 131181 143701 6562.2
##
## Step: AIC=3110.37
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + Difficulty +
## Hash_rate + Output_value..BTC._pChange + Miners_revenue_pChange +
## No_of_transactions_pChange + Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## - Output_value..BTC._pChange 1 13 12545 3109.9
## <none> 12532 3110.4
## - Week 1 25 12557 3111.2
## - Day 1 25 12558 3111.2
## - Date 1 28 12560 3111.5
## - Transaction_fees..BTC. 1 137 12669 3123.7
## - No_of_transactions_pChange 1 162 12695 3126.6
## - Difficulty 1 246 12778 3135.8
## - Hash_rate 1 251 12783 3136.4
## - Difficulty_pChange 1 29528 42060 4821.6
## - Hash_rate_pChange 1 73451 85983 5833.4
## - Miners_revenue_pChange 1 131201 143733 6560.5
##
## Step: AIC=3109.85
## pChange ~ Date + Day + Week + Transaction_fees..BTC. + Difficulty +
## Hash_rate + Miners_revenue_pChange + No_of_transactions_pChange +
## Difficulty_pChange + Hash_rate_pChange
##
## Df Sum of Sq RSS AIC
## <none> 12545 3109.9
## - Week 1 25 12570 3110.6
## - Day 1 25 12570 3110.7
## - Date 1 26 12571 3110.7
## - Transaction_fees..BTC. 1 137 12682 3123.2
## - No_of_transactions_pChange 1 170 12715 3126.9
## - Difficulty 1 246 12791 3135.3
## - Hash_rate 1 252 12797 3136.0
## - Difficulty_pChange 1 29556 42101 4821.0
## - Hash_rate_pChange 1 73464 86009 5831.9
## - Miners_revenue_pChange 1 131319 143864 6559.8
##
## Call:
## lm(formula = pChange ~ Date + Day + Week + Transaction_fees..BTC. +
## Difficulty + Hash_rate + Miners_revenue_pChange + No_of_transactions_pChange +
## Difficulty_pChange + Hash_rate_pChange, data = bitcoin_final[,
## -4])
##
## Coefficients:
## (Intercept) Date
## -3.061e+00 2.273e-04
## Day Week
## 4.855e-02 -3.374e-01
## Transaction_fees..BTC. Difficulty
## -3.303e-03 -5.541e-12
## Hash_rate Miners_revenue_pChange
## 7.396e-07 8.926e-01
## No_of_transactions_pChange Difficulty_pChange
## -3.203e-03 7.347e-01
## Hash_rate_pChange
## -8.716e-01
Fitting the model with lowest AIC and checking residuals…
Removing time effect, difficulty, hash rate and no of transactions cannot be pchange
fit.lm5=lm(formula = pChange ~ Transaction_fees..BTC. +
Difficulty + Hash_rate + Miners_revenue_pChange + No_of_transactions, data = bitcoin_final[, -4])
summary(fit.lm5)
##
## Call:
## lm(formula = pChange ~ Transaction_fees..BTC. + Difficulty +
## Hash_rate + Miners_revenue_pChange + No_of_transactions,
## data = bitcoin_final[, -4])
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.243 -4.124 -0.310 3.579 101.126
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.434e-01 3.230e-01 1.992 0.0465 *
## Transaction_fees..BTC. 3.186e-04 2.500e-03 0.127 0.8986
## Difficulty 1.326e-11 2.772e-12 4.784 1.90e-06 ***
## Hash_rate -1.780e-06 3.668e-07 -4.854 1.34e-06 ***
## Miners_revenue_pChange 3.599e-01 1.245e-02 28.908 < 2e-16 ***
## No_of_transactions -6.256e-07 2.975e-06 -0.210 0.8335
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.036 on 1409 degrees of freedom
## Multiple R-squared: 0.3733, Adjusted R-squared: 0.371
## F-statistic: 167.8 on 5 and 1409 DF, p-value: < 2.2e-16
All the variables are significant and R^2 = 91.12%
autoplot(fit.lm5, label = 0, col = "goldenrod1") +
theme_minimal()
Residuals plot looks much better than before, lets run a test for homogenous variance to check our hypothesis for constant variance
lmtest::bptest(fit.lm5) # Breusch-Pagan test
##
## studentized Breusch-Pagan test
##
## data: fit.lm5
## BP = 206.1, df = 5, p-value < 2.2e-16
car::ncvTest(fit.lm5) # Breusch-Pagan test
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2633.586 Df = 1 p = 0
The test for homogenous variance has p-value < 0.05, therefore we cannot assume constant variance
After trying various transformations and other regression methods, the data assumption of constant variance was not satisfied.
Next step, is to explore the trend in the data that is causing non-constant variance
bitcoin_final$Year <- format(bitcoin_final$Date, "%Y")
bitcoin_final$Month <- format(bitcoin_final$Date, "%b")
bitcoin_final$Day <- format(bitcoin_final$Date, "%d")
bitcoin_final$CommonDate <- as.Date(paste0("2000-",format(bitcoin_final$Date, "%j")), "%Y-%j")
ggplot(data = bitcoin_final,
mapping = aes(x = CommonDate, y = Price, shape = Year, colour = Year)) +
geom_point() +
geom_line() +
facet_grid(facets = Year ~ .) +
scale_x_date(labels = function(x) format(x, "%d-%b"))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 9.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 434 rows containing missing values (geom_point).
ggplotly()
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 9.
## Consider specifying shapes manually if you must have them.
We can see that most of bitcoin fluctuations started from June/July 2017 and the bitcoin price before that is in a few hundreds, which may be causing high variance in the data.
Useful links on time series ggplots: http://www.sthda.com/english/articles/32-r-graphics-essentials/128-plot-time-series-data-using-ggplot/ http://neondataskills.org/R/time-series-plot-ggplot/ https://plot.ly/ggplot2/time-series/
Use bitcoin_final dataset to go ahead, as it has the transformed variables too incase transformation is needed.
bitcoin_new = subset(bitcoin_final, bitcoin_final$Date >= '2016-06-01')
bitcoin_new$Day = as.numeric(bitcoin_new$Day)
head(bitcoin_new)
## Date Day Week Price Transaction_fees..BTC.
## 1058 2016-06-01 1 23 539.4700 56.18498
## 1059 2016-06-03 3 23 568.0000 54.69657
## 1060 2016-06-05 5 24 574.0200 44.36412
## 1061 2016-06-07 7 24 577.5400 56.51048
## 1062 2016-06-09 9 24 575.2941 53.21551
## 1063 2016-06-11 11 24 594.4400 46.24529
## No_of_transactions Output_value..BTC. estimated_transaction_value
## 1058 234385 2703528 203235833
## 1059 234669 2272984 186207347
## 1060 190528 1848155 161858533
## 1061 248576 1703861 142680383
## 1062 236353 1651160 143061530
## 1063 213159 1788936 162636056
## Miners_revenue Cost_per_transaction Difficulty Hash_rate Trade_volume
## 1058 1972576 8.415964 1.99e+11 1426731 59032596
## 1059 2047468 8.724917 1.99e+11 1406916 26922629
## 1060 2479401 13.013318 1.99e+11 1694243 45168190
## 1061 2169535 8.727854 1.99e+11 1406916 34509545
## 1062 2410789 10.199949 1.96e+11 1608134 28300189
## 1063 2364669 11.093452 1.96e+11 1520418 24377615
## pChange Transaction_fees..BTC._pChange Output_value..BTC._pChange
## 1058 2.7268400 -2.881464 -5.237267
## 1059 5.2885239 -2.649121 -15.925274
## 1060 1.0598592 -18.890496 -18.690341
## 1061 0.6132191 27.378798 -7.807462
## 1062 -0.3888670 -5.830722 -3.093061
## 1063 3.3280106 -13.098093 8.344216
## estimated_transaction_value_pChange Miners_revenue_pChange
## 1058 10.3068451 1.986003
## 1059 -8.3786832 3.796654
## 1060 -13.0761829 21.095998
## 1061 -11.8487114 -12.497627
## 1062 0.2671331 11.120056
## 1063 13.6825928 -1.913045
## No_of_transactions_pChange Cost_per_transaction_pChange
## 1058 10.8890140 -8.028758
## 1059 0.1211682 3.671038
## 1060 -18.8098982 49.151184
## 1061 30.4669130 -32.931368
## 1062 -4.9172084 16.866631
## 1063 -9.8132878 8.759875
## Difficulty_pChange Hash_rate_pChange Year Month CommonDate
## 1058 0.000000 -0.6896552 2016 Jun 2000-06-01
## 1059 0.000000 -1.3888889 2016 Jun 2000-06-03
## 1060 0.000000 20.4225352 2016 Jun 2000-06-05
## 1061 0.000000 -16.9590643 2016 Jun 2000-06-07
## 1062 -1.507538 14.3020864 2016 Jun 2000-06-09
## 1063 0.000000 -5.4545454 2016 Jun 2000-06-11
ggplot(bitcoin_new, aes(bitcoin_new$Date, bitcoin_new$Price,)) + geom_line() +
geom_point(color = "dark blue") +
ggtitle("Bitcoin values in USD trend") +
labs ( x="Date" , y="Bitcoin price in USD") +
theme_minimal()
cor = cor(bitcoin_new[,c(2:12)])
corrplot(cor, method = "square", type="upper", tl.srt = 50, tl.col = "black", tl.cex = 0.6, title = "Correlation of Variables")
correlation = as.data.frame(cor)
highly_correlated = correlation[correlation$Price>= 0.75, ]
rownames(highly_correlated)
## [1] "Price" "estimated_transaction_value"
## [3] "Miners_revenue" "Cost_per_transaction"
## [5] "Difficulty" "Hash_rate"
These are the highly correlated variables to bitcoin market price
Additive model with all variables
bitcoin_new_set = bitcoin_new[,c(1:12)]
fit.lm1_1 = lm(Price ~., data=bitcoin_new_set)
summary(fit.lm1_1)
##
## Call:
## lm(formula = Price ~ ., data = bitcoin_new_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -953.53 -143.20 -39.01 142.09 1831.33
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.529e+04 3.592e+03 -12.609 < 2e-16 ***
## Date 2.611e+00 2.138e-01 12.211 < 2e-16 ***
## Day 2.484e-01 1.728e+00 0.144 0.88579
## Week 3.527e+00 1.140e+00 3.093 0.00214 **
## Transaction_fees..BTC. -2.492e+00 1.635e-01 -15.242 < 2e-16 ***
## No_of_transactions 3.078e-03 5.485e-04 5.611 4.12e-08 ***
## Output_value..BTC. 3.756e-05 3.122e-05 1.203 0.22971
## estimated_transaction_value 1.555e-07 8.502e-08 1.829 0.06825 .
## Miners_revenue 3.120e-04 9.882e-06 31.578 < 2e-16 ***
## Cost_per_transaction 2.097e+01 2.411e+00 8.698 < 2e-16 ***
## Difficulty 2.409e-09 1.095e-10 22.011 < 2e-16 ***
## Hash_rate -3.110e-04 1.579e-05 -19.696 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 280.3 on 346 degrees of freedom
## Multiple R-squared: 0.996, Adjusted R-squared: 0.9959
## F-statistic: 7894 on 11 and 346 DF, p-value: < 2.2e-16
R^2 is almost 100%, which might indicate overfitting, some variables like trade volume, output value and estimated transaction value are insignificant in the model.
autoplot(fit.lm1_1, label = 0, col = "goldenrod1") +
theme_minimal()
Residuals indicate heteroscedasticity and the data deviates from normal.
Running step function to get a model with the lowest AIC
step(fit.lm1_1)
## Start: AIC=4047.12
## Price ~ Date + Day + Week + Transaction_fees..BTC. + No_of_transactions +
## Output_value..BTC. + estimated_transaction_value + Miners_revenue +
## Cost_per_transaction + Difficulty + Hash_rate
##
## Df Sum of Sq RSS AIC
## - Day 1 1623 27189967 4045.1
## - Output_value..BTC. 1 113763 27302106 4046.6
## <none> 27188343 4047.1
## - estimated_transaction_value 1 262887 27451230 4048.6
## - Week 1 751974 27940317 4054.9
## - No_of_transactions 1 2474173 29662516 4076.3
## - Cost_per_transaction 1 5944403 33132746 4115.9
## - Date 1 11717661 38906004 4173.4
## - Transaction_fees..BTC. 1 18255624 45443967 4229.0
## - Hash_rate 1 30482339 57670682 4314.3
## - Difficulty 1 38069298 65257641 4358.6
## - Miners_revenue 1 78355210 105543553 4530.7
##
## Step: AIC=4045.14
## Price ~ Date + Week + Transaction_fees..BTC. + No_of_transactions +
## Output_value..BTC. + estimated_transaction_value + Miners_revenue +
## Cost_per_transaction + Difficulty + Hash_rate
##
## Df Sum of Sq RSS AIC
## - Output_value..BTC. 1 113124 27303091 4044.6
## <none> 27189967 4045.1
## - estimated_transaction_value 1 261372 27451339 4046.6
## - Week 1 782817 27972784 4053.3
## - No_of_transactions 1 2496253 29686219 4074.6
## - Cost_per_transaction 1 6036872 33226839 4114.9
## - Date 1 11718173 38908140 4171.4
## - Transaction_fees..BTC. 1 18517284 45707250 4229.1
## - Hash_rate 1 30488404 57678370 4312.4
## - Difficulty 1 38089590 65279557 4356.7
## - Miners_revenue 1 78774086 105964052 4530.1
##
## Step: AIC=4044.63
## Price ~ Date + Week + Transaction_fees..BTC. + No_of_transactions +
## estimated_transaction_value + Miners_revenue + Cost_per_transaction +
## Difficulty + Hash_rate
##
## Df Sum of Sq RSS AIC
## <none> 27303091 4044.6
## - estimated_transaction_value 1 409702 27712793 4048.0
## - Week 1 747011 28050102 4052.3
## - No_of_transactions 1 2405751 29708842 4072.9
## - Cost_per_transaction 1 5932360 33235451 4113.0
## - Date 1 12404640 39707731 4176.7
## - Transaction_fees..BTC. 1 18460738 45763829 4227.5
## - Hash_rate 1 30476236 57779327 4311.0
## - Difficulty 1 38725343 66028434 4358.8
## - Miners_revenue 1 78865192 106168283 4528.8
##
## Call:
## lm(formula = Price ~ Date + Week + Transaction_fees..BTC. + No_of_transactions +
## estimated_transaction_value + Miners_revenue + Cost_per_transaction +
## Difficulty + Hash_rate, data = bitcoin_new_set)
##
## Coefficients:
## (Intercept) Date
## -4.588e+04 2.652e+00
## Week Transaction_fees..BTC.
## 3.463e+00 -2.463e+00
## No_of_transactions estimated_transaction_value
## 3.006e-03 1.851e-07
## Miners_revenue Cost_per_transaction
## 3.109e-04 2.051e+01
## Difficulty Hash_rate
## 2.384e-09 -3.088e-04
Fit the model with lowest AIC and check for residuals
fit.lm2_2= lm(formula = Price ~ Date + Week + Transaction_fees..BTC. + estimated_transaction_value +
No_of_transactions + Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate, , data = bitcoin_new_set)
summary(fit.lm2_2)
##
## Call:
## lm(formula = Price ~ Date + Week + Transaction_fees..BTC. + estimated_transaction_value +
## No_of_transactions + Miners_revenue + Cost_per_transaction +
## Difficulty + Hash_rate, data = bitcoin_new_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -954.12 -148.53 -33.14 142.41 1859.23
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.588e+04 3.552e+03 -12.917 < 2e-16 ***
## Date 2.652e+00 2.109e-01 12.574 < 2e-16 ***
## Week 3.463e+00 1.122e+00 3.086 0.00219 **
## Transaction_fees..BTC. -2.463e+00 1.606e-01 -15.339 < 2e-16 ***
## estimated_transaction_value 1.851e-07 8.098e-08 2.285 0.02290 *
## No_of_transactions 3.006e-03 5.428e-04 5.537 6.06e-08 ***
## Miners_revenue 3.109e-04 9.806e-06 31.705 < 2e-16 ***
## Cost_per_transaction 2.051e+01 2.358e+00 8.696 < 2e-16 ***
## Difficulty 2.385e-09 1.073e-10 22.217 < 2e-16 ***
## Hash_rate -3.088e-04 1.567e-05 -19.709 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 280.1 on 348 degrees of freedom
## Multiple R-squared: 0.996, Adjusted R-squared: 0.9959
## F-statistic: 9663 on 9 and 348 DF, p-value: < 2.2e-16
All the variables are significant in the model
autoplot(fit.lm2_2, label = 0, col = "goldenrod1") +
theme_minimal()
Residuals indicate heteroscedasticity and the data deviates from normal.
Using weighted regression
wts = 1/fitted(lm(abs(residuals(fit.lm2_2)) ~ fitted(fit.lm2_2)))^2
fit.lm3_3= lm(formula = Price ~ Transaction_fees..BTC. +
No_of_transactions + Miners_revenue + Cost_per_transaction + Difficulty + Hash_rate, , data = bitcoin_new_set, weights=wts)
autoplot(fit.lm3_3, label = 0, col = "goldenrod1") +
theme_minimal()
Applied weighted regression did not help much.
residualPlots(fit.lm3_3)
## Test stat Pr(>|Test stat|)
## Transaction_fees..BTC. -5.9524 6.421e-09 ***
## No_of_transactions 4.9525 1.142e-06 ***
## Miners_revenue -2.2503 0.025051 *
## Cost_per_transaction -0.3181 0.750577
## Difficulty -10.0800 < 2.2e-16 ***
## Hash_rate -7.9906 1.965e-14 ***
## Tukey test -2.6430 0.008217 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Weighed regression does not add any values, fit a normal regression
Using the transformed variables to % changes
Transaction_fees..BTC. - Keep it as is No_of_transactions - Keep it as is Miners_revenue - Use pChange Cost_per_transaction - Keep it as is Difficulty - As this is the difficulty level, keep it as is Hash_rate - Already a rate, keep it as is
fit.lm4_4= lm(formula = pChange ~ Transaction_fees..BTC. +
No_of_transactions + Miners_revenue_pChange + Cost_per_transaction + Difficulty + Hash_rate , data = bitcoin_new)
summary(fit.lm4_4)
##
## Call:
## lm(formula = pChange ~ Transaction_fees..BTC. + No_of_transactions +
## Miners_revenue_pChange + Cost_per_transaction + Difficulty +
## Hash_rate, data = bitcoin_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.8903 -3.1624 -0.4798 2.9476 27.2666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.177e+00 1.997e+00 -2.092 0.037200 *
## Transaction_fees..BTC. -1.126e-02 2.963e-03 -3.801 0.000170 ***
## No_of_transactions 2.607e-05 8.228e-06 3.169 0.001664 **
## Miners_revenue_pChange 2.059e-01 2.176e-02 9.461 < 2e-16 ***
## Cost_per_transaction 8.200e-02 2.126e-02 3.856 0.000137 ***
## Difficulty 1.056e-11 2.255e-12 4.683 4.04e-06 ***
## Hash_rate -1.714e-06 3.234e-07 -5.300 2.05e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.855 on 351 degrees of freedom
## Multiple R-squared: 0.2395, Adjusted R-squared: 0.2265
## F-statistic: 18.42 on 6 and 351 DF, p-value: < 2.2e-16
residualPlots(fit.lm4_4)
## Test stat Pr(>|Test stat|)
## Transaction_fees..BTC. -3.7714 0.0001905 ***
## No_of_transactions -0.7250 0.4689640
## Miners_revenue_pChange -3.9642 8.930e-05 ***
## Cost_per_transaction -1.6858 0.0927195 .
## Difficulty -1.1861 0.2363864
## Hash_rate -0.6745 0.5004553
## Tukey test -4.0364 5.428e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Apply poly transaformation on Transaction_fees..BTC. and Miners_revenue
fit.lm5_5= lm(formula = pChange ~ poly(Transaction_fees..BTC.,2) +
No_of_transactions + poly(Miners_revenue_pChange,2) + Cost_per_transaction + Difficulty + poly(Hash_rate,1) , data = bitcoin_new)
summary(fit.lm5_5)
##
## Call:
## lm(formula = pChange ~ poly(Transaction_fees..BTC., 2) + No_of_transactions +
## poly(Miners_revenue_pChange, 2) + Cost_per_transaction +
## Difficulty + poly(Hash_rate, 1), data = bitcoin_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.3129 -3.1599 -0.2331 3.1219 25.1745
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.189e+01 3.832e+00 -5.713 2.38e-08
## poly(Transaction_fees..BTC., 2)1 -2.968e+01 9.729e+00 -3.051 0.00245
## poly(Transaction_fees..BTC., 2)2 -2.605e+01 6.304e+00 -4.133 4.49e-05
## No_of_transactions 1.378e-05 8.203e-06 1.679 0.09397
## poly(Miners_revenue_pChange, 2)1 6.587e+01 6.157e+00 10.699 < 2e-16
## poly(Miners_revenue_pChange, 2)2 -2.576e+01 5.976e+00 -4.310 2.12e-05
## Cost_per_transaction 9.835e-02 2.087e-02 4.713 3.54e-06
## Difficulty 1.441e-11 2.265e-12 6.365 6.15e-10
## poly(Hash_rate, 1) -3.606e+02 5.162e+01 -6.985 1.45e-11
##
## (Intercept) ***
## poly(Transaction_fees..BTC., 2)1 **
## poly(Transaction_fees..BTC., 2)2 ***
## No_of_transactions .
## poly(Miners_revenue_pChange, 2)1 ***
## poly(Miners_revenue_pChange, 2)2 ***
## Cost_per_transaction ***
## Difficulty ***
## poly(Hash_rate, 1) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.608 on 349 degrees of freedom
## Multiple R-squared: 0.3061, Adjusted R-squared: 0.2902
## F-statistic: 19.25 on 8 and 349 DF, p-value: < 2.2e-16
autoplot(fit.lm5_5, label = 0, col = "goldenrod1") +
theme_minimal()
Residuals now look random, lets do test for homogenous variance
lmtest::bptest(fit.lm5_5) # Breusch-Pagan test
##
## studentized Breusch-Pagan test
##
## data: fit.lm5_5
## BP = 14.566, df = 8, p-value = 0.06815
car::ncvTest(fit.lm5_5) # Breusch-Pagan test
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.2503768 Df = 1 p = 0.6168099
p-value > 0.05, we can assume the data has constant variance
shapiro.test(residuals(fit.lm5_5))
##
## Shapiro-Wilk normality test
##
## data: residuals(fit.lm5_5)
## W = 0.97965, p-value = 6.041e-05
residualPlots(fit.lm5_5)
## Test stat Pr(>|Test stat|)
## poly(Transaction_fees..BTC., 2)
## No_of_transactions -0.8456 0.3984
## poly(Miners_revenue_pChange, 2)
## Cost_per_transaction -0.8905 0.3738
## Difficulty 0.0755 0.9399
## poly(Hash_rate, 1)
## Tukey test 0.6896 0.4904
Residuals vs Predictors plots looks good!
bitcoin_new= as.data.table(bitcoin_new)
datas <- rbindlist(list(bitcoin_new[, .(pChange, Date)],data.table(value = fit.lm5_5$fitted.values, data_time = bitcoin_new[, Date])))
datas[, type := rep(c("Real", "Fitted"), each = nrow(bitcoin_new))]
ggplot(data = datas, aes(Date, pChange, group = type, colour = type)) +
geom_line(size = 0.8) +
theme_bw() +
labs(x = "Date", y = "Bitcoin price change %",
title = "Fit from Multiple Linear Regression")
set.seed(123)
indexes=sample(1:nrow(bitcoin_new), size=0.2*nrow(bitcoin_new))
test = bitcoin_new[indexes,]
train = bitcoin_new[-indexes,]
Use model fit.lm5_5 on train set
final_model = lm(formula = pChange ~ poly(Transaction_fees..BTC.,2) +
No_of_transactions + poly(Miners_revenue_pChange,2) + Cost_per_transaction + Difficulty + poly(Hash_rate,1) , data = train)
test$pred = predict(final_model, test)
Get the error in prediction
error = mean(abs(test$pred - test$pChange))
error
## [1] 4.743434