FINAL PROJECT

READING THE DATA AND SUMMARY

getwd()
## [1] "C:/Users/TANAY/Downloads"
bcdata <- read.csv("bitcoindata.csv")

### Summarising the data
summary(bcdata)
##    Timestamp              Open              High              Low         
##  Min.   :1.499e+09   Min.   : 209010   Min.   : 209010   Min.   : 208000  
##  1st Qu.:1.503e+09   1st Qu.: 418916   1st Qu.: 419228   1st Qu.: 418701  
##  Median :1.507e+09   Median : 525979   Median : 526170   Median : 525270  
##  Mean   :1.507e+09   Mean   : 795237   Mean   : 795972   Mean   : 794452  
##  3rd Qu.:1.511e+09   3rd Qu.: 918450   3rd Qu.: 918890   3rd Qu.: 917893  
##  Max.   :1.515e+09   Max.   :2257221   Max.   :2257221   Max.   :2255061  
##      Close           Volume_BTC      Volume_Currency     Weighted_Price   
##  Min.   : 208000   Min.   :  0.001   Min.   :      263   Min.   : 208298  
##  1st Qu.: 419180   1st Qu.:  2.875   1st Qu.:  1459349   1st Qu.: 419028  
##  Median : 525446   Median :  7.166   Median :  4352586   Median : 525516  
##  Mean   : 795269   Mean   : 12.997   Mean   : 10840806   Mean   : 795213  
##  3rd Qu.: 918562   3rd Qu.: 15.413   3rd Qu.: 12600004   3rd Qu.: 918169  
##  Max.   :2256147   Max.   :233.771   Max.   :175122694   Max.   :2256431  
##      Ratio          
##  Min.   :4.432e-07  
##  1st Qu.:1.089e-06  
##  Median :1.903e-06  
##  Mean   :1.851e-06  
##  3rd Qu.:2.386e-06  
##  Max.   :4.801e-06
library(psych)
describe(bcdata)
##                 vars    n         mean         sd       median
## Timestamp          1 1050 1507362416.7  4718808.2 1.507358e+09
## Open               2 1050     795237.2   558914.5 5.259785e+05
## High               3 1050     795971.9   559620.4 5.261700e+05
## Low                4 1050     794452.3   558153.1 5.252700e+05
## Close              5 1050     795269.3   558971.7 5.254455e+05
## Volume_BTC         6 1050         13.0       17.6 7.170000e+00
## Volume_Currency    7 1050   10840805.6 17024522.6 4.352586e+06
## Weighted_Price     8 1050     795212.6   558902.6 5.255162e+05
## Ratio              9 1050          0.0        0.0 0.000000e+00
##                      trimmed        mad          min          max
## Timestamp       1507398656.9 6118927.42 1.499155e+09 1.515367e+09
## Open                710018.6  318478.79 2.090100e+05 2.257221e+06
## High                710660.4  318739.73 2.090100e+05 2.257221e+06
## Low                 709335.0  317606.28 2.080000e+05 2.255061e+06
## Close               710030.3  317862.77 2.080000e+05 2.256147e+06
## Volume_BTC               9.5       7.85 0.000000e+00 2.337700e+02
## Volume_Currency    7037991.3 5088954.25 2.628800e+02 1.751227e+08
## Weighted_Price      709994.3  317879.10 2.082981e+05 2.256430e+06
## Ratio                    0.0       0.00 0.000000e+00 0.000000e+00
##                        range  skew kurtosis        se
## Timestamp        16211820.00 -0.04    -1.19 145625.58
## Open              2048211.00  1.20     0.06  17248.48
## High              2048211.00  1.20     0.06  17270.26
## Low               2047061.00  1.20     0.06  17224.98
## Close             2048147.00  1.20     0.06  17250.24
## Volume_BTC            233.77  4.11    31.22      0.54
## Volume_Currency 175122431.52  3.62    20.51 525388.17
## Weighted_Price    2048132.35  1.20     0.06  17248.11
## Ratio                   0.00  0.45    -0.54      0.00
dim(bcdata)
## [1] 1050    9
str(bcdata)
## 'data.frame':    1050 obs. of  9 variables:
##  $ Timestamp      : int  1504017540 1504388460 1502613420 1512550620 1506538440 1508046480 1506057180 1503119100 1506509580 1507674660 ...
##  $ Open           : int  488440 507501 463431 1402190 454001 646990 417800 453500 444968 537815 ...
##  $ High           : int  488500 508020 463890 1403648 454001 647499 417900 453558 446470 538020 ...
##  $ Low            : int  488255 507500 462540 1401308 453390 646968 417507 452622 444968 537593 ...
##  $ Close          : int  488256 507909 463090 1401810 453796 647499 417900 452651 446470 538020 ...
##  $ Volume_BTC     : num  1.81 23.83 8.18 14.17 6.27 ...
##  $ Volume_Currency: num  882993 12095510 3787479 19871391 2846473 ...
##  $ Weighted_Price : num  488355 507601 463022 1402333 453833 ...
##  $ Ratio          : num  2.05e-06 1.97e-06 2.16e-06 7.13e-07 2.20e-06 ...

PLOTS/VISUALISATIONS

library(lattice)
histogram(bcdata$High, Main="Range of High Value", xlab="High value")

boxplot(bcdata$High, Main="Range of High Value", xlab="High value", horizontal = TRUE)

histogram(bcdata$Low, Main="Range of Low Value", xlab="Low value")

boxplot(bcdata$Low, Main="Range of Low Value", xlab="Low value", horizontal = TRUE)

histogram(bcdata$Volume_BTC, Main="Volume of bitcoin", xlab="Bitcoin volume")

boxplot(bcdata$Volume_BTC, Main="Volume of bitcoin", xlab="Bitcoin volume", horizontal = TRUE)

histogram(bcdata$Volume_Currency, Main="Volume of cryptocurrency", xlab="Cryptocurrency volume")

boxplot(bcdata$Volume_Currency, Main="Volume of cryptocurrency", xlab="Cryptocurrency volume", horizontal = TRUE)

histogram(bcdata$Weighted_Price, Main="Weighted price", xlab="Weighted price")

boxplot(bcdata$Weighted_Price, Main="Weighted price", xlab="Weighted price", horizontal = TRUE)

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(bcdata$Weighted_Price~bcdata$Ratio, main="Weighted price Vs Ratio", ylab="Weighted price", xlab="Ratio")

scatterplot(bcdata$Weighted_Price~bcdata$Volume_BTC, main="Weighted price Vs Volume of bitcoin", ylab="Weighted price", xlab="Bitcoin Volume")

scatterplot(bcdata$Weighted_Price~bcdata$Volume_Currency, main="Weighted price Vs Volume of currency", ylab="Weighted price", xlab="Volume of currency")

scatterplot(bcdata$High~bcdata$Volume_BTC, main="High value Vs Bitcoin volume", ylab="High value", xlab="Bitcoin volume")

scatterplot(bcdata$High~bcdata$Volume_Currency, main="High value Vs Currency volume", ylab="High value", xlab="Currency volume")

scatterplot(bcdata$Low~bcdata$Volume_BTC, main="Low value Vs Bitcoin volume", ylab="Low value", xlab="Bitcoin volume")

scatterplot(bcdata$Low~bcdata$Volume_Currency, main="Low value Vs Currency volume", ylab="Low value", xlab="Currency volume")

scatterplot(bcdata$High~bcdata$Ratio, main="High value Vs Ratio", ylab="High value", xlab="Ratio")

scatterplot(bcdata$Low~bcdata$Ratio, main="Low value Vs Ratio", ylab="Low value", xlab="Ratio")

scatterplot(bcdata$High~bcdata$Weighted_Price, main="High value Vs Weighted price", ylab="High value", xlab="Weighted price")

CORRELATION/CORRGRAM

library("corrplot")
## corrplot 0.84 loaded
corrplot(corr=cor(bcdata[ ,c(2:9)],use="complete.obs"), method="ellipse")

library("corrgram")
corrgram(bcdata,upper.panel=panel.pie, main="Corrgram of Bitcoin Data variables")

corr.test(bcdata[ ,c(2:9)],use="complete")
## Call:corr.test(x = bcdata[, c(2:9)], use = "complete")
## Correlation matrix 
##                  Open  High   Low Close Volume_BTC Volume_Currency
## Open             1.00  1.00  1.00  1.00       0.05            0.47
## High             1.00  1.00  1.00  1.00       0.05            0.47
## Low              1.00  1.00  1.00  1.00       0.05            0.47
## Close            1.00  1.00  1.00  1.00       0.05            0.47
## Volume_BTC       0.05  0.05  0.05  0.05       1.00            0.77
## Volume_Currency  0.47  0.47  0.47  0.47       0.77            1.00
## Weighted_Price   1.00  1.00  1.00  1.00       0.05            0.47
## Ratio           -0.85 -0.85 -0.85 -0.85      -0.08           -0.42
##                 Weighted_Price Ratio
## Open                      1.00 -0.85
## High                      1.00 -0.85
## Low                       1.00 -0.85
## Close                     1.00 -0.85
## Volume_BTC                0.05 -0.08
## Volume_Currency           0.47 -0.42
## Weighted_Price            1.00 -0.85
## Ratio                    -0.85  1.00
## Sample Size 
## [1] 1050
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##                 Open High Low Close Volume_BTC Volume_Currency
## Open             0.0 0.00 0.0   0.0       0.46               0
## High             0.0 0.00 0.0   0.0       0.46               0
## Low              0.0 0.00 0.0   0.0       0.46               0
## Close            0.0 0.00 0.0   0.0       0.46               0
## Volume_BTC       0.1 0.09 0.1   0.1       0.00               0
## Volume_Currency  0.0 0.00 0.0   0.0       0.00               0
## Weighted_Price   0.0 0.00 0.0   0.0       0.10               0
## Ratio            0.0 0.00 0.0   0.0       0.01               0
##                 Weighted_Price Ratio
## Open                      0.00  0.00
## High                      0.00  0.00
## Low                       0.00  0.00
## Close                     0.00  0.00
## Volume_BTC                0.46  0.07
## Volume_Currency           0.00  0.00
## Weighted_Price            0.00  0.00
## Ratio                     0.00  0.00
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option
cor.test(bcdata$High,bcdata$Volume_BTC)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$High and bcdata$Volume_BTC
## t = 1.6875, df = 1048, p-value = 0.09181
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.008469569  0.112200601
## sample estimates:
##        cor 
## 0.05205553
cor.test(bcdata$High,bcdata$Volume_Currency)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$High and bcdata$Volume_Currency
## t = 17.361, df = 1048, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4242520 0.5182987
## sample estimates:
##       cor 
## 0.4726199
cor.test(bcdata$Weighted_Price,bcdata$Volume_BTC)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$Weighted_Price and bcdata$Volume_BTC
## t = 1.6661, df = 1048, p-value = 0.096
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.009129958  0.111548427
## sample estimates:
##        cor 
## 0.05139686
cor.test(bcdata$Weighted_Price,bcdata$Volume_Currency)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$Weighted_Price and bcdata$Volume_Currency
## t = 17.321, df = 1048, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4233559 0.5174994
## sample estimates:
##       cor 
## 0.4717712
cor.test(bcdata$Weighted_Price,bcdata$Ratio)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$Weighted_Price and bcdata$Ratio
## t = -51.323, df = 1048, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8621819 -0.8276533
## sample estimates:
##       cor 
## -0.845801
cor.test(bcdata$High,bcdata$Ratio)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$High and bcdata$Ratio
## t = -51.31, df = 1048, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8621260 -0.8275847
## sample estimates:
##       cor 
## -0.845739
cor.test(bcdata$Low,bcdata$Ratio)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$Low and bcdata$Ratio
## t = -51.339, df = 1048, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8622486 -0.8277352
## sample estimates:
##       cor 
## -0.845875
cor.test(bcdata$High,bcdata$Weighted_Price)
## 
##  Pearson's product-moment correlation
## 
## data:  bcdata$High and bcdata$Weighted_Price
## t = 17530, df = 1048, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9999981 0.9999985
## sample estimates:
##       cor 
## 0.9999983

Correlation test results: Weighted price is negatively and strongly correlated to the Ratio(Volume of bitcoin/Volume of Currency). Weighted price is correlated to Volume of Currency. High value is correlated to Volume of Currency. High value and Weighted price are strongly correlated.

REGRESSION MODELS

model1 <- lm(Weighted_Price~ Ratio + Volume_Currency + Volume_BTC ,data=bcdata)
summary(model1)
## 
## Call:
## lm(formula = Weighted_Price ~ Ratio + Volume_Currency + Volume_BTC, 
##     data = bcdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -972811 -193287 -126277  138195 1021502 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.478e+06  2.193e+04   67.40   <2e-16 ***
## Ratio           -3.781e+11  9.889e+09  -38.23   <2e-16 ***
## Volume_Currency  1.572e-02  9.139e-04   17.20   <2e-16 ***
## Volume_BTC      -1.182e+04  8.065e+02  -14.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 263500 on 1046 degrees of freedom
## Multiple R-squared:  0.7783, Adjusted R-squared:  0.7777 
## F-statistic:  1224 on 3 and 1046 DF,  p-value: < 2.2e-16
model2 <- lm(High~ Weighted_Price + Ratio + Volume_Currency + Volume_BTC ,data=bcdata)
summary(model2)
## 
## Call:
## lm(formula = High ~ Weighted_Price + Ratio + Volume_Currency + 
##     Volume_BTC, data = bcdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3102.8  -233.9  -100.6   149.4  8388.4 
## 
## Coefficients:
##                   Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)     -4.288e+02  1.661e+02   -2.582  0.00995 ** 
## Weighted_Price   1.001e+00  1.013e-04 9879.164  < 2e-16 ***
## Ratio            1.351e+08  5.016e+07    2.694  0.00718 ** 
## Volume_Currency  5.419e-05  3.391e-06   15.980  < 2e-16 ***
## Volume_BTC      -1.815e+01  2.901e+00   -6.257  5.7e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 863.4 on 1045 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.102e+08 on 4 and 1045 DF,  p-value: < 2.2e-16
model3 <- lm(Volume_Currency~ High + Low + Open + Close ,data=bcdata)
summary(model3)
## 
## Call:
## lm(formula = Volume_Currency ~ High + Low + Open + Close, data = bcdata)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -69563673  -4499920  -2451092   1112275 154228879 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1755812.6   684609.3   2.565   0.0105 *  
## High           5529.2      449.5  12.301  < 2e-16 ***
## Low           -2984.3      401.0  -7.443 2.06e-13 ***
## Open          -1732.9      441.8  -3.923 9.33e-05 ***
## Close          -808.6      431.2  -1.875   0.0610 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12570000 on 1045 degrees of freedom
## Multiple R-squared:  0.457,  Adjusted R-squared:  0.455 
## F-statistic: 219.9 on 4 and 1045 DF,  p-value: < 2.2e-16
model4 <- lm(Volume_BTC~ High + Low + Open + Close ,data=bcdata)
summary(model4)
## 
## Call:
## lm(formula = Volume_BTC ~ High + Low + Open + Close, data = bcdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.830  -8.508  -4.452   2.162 211.855 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.5514582  0.8931642  15.172  < 2e-16 ***
## High         0.0040051  0.0005864   6.830 1.44e-11 ***
## Low         -0.0026086  0.0005231  -4.987 7.19e-07 ***
## Open        -0.0010414  0.0005763  -1.807   0.0711 .  
## Close       -0.0003621  0.0005626  -0.644   0.5199    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.4 on 1045 degrees of freedom
## Multiple R-squared:  0.1351, Adjusted R-squared:  0.1318 
## F-statistic: 40.82 on 4 and 1045 DF,  p-value: < 2.2e-16