library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(wikipediatrend)
views<-wp_trend(page = "Citigroup",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)
## Option 'requestFrom' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## Check wp_http_header() to know which information are send to
## stats.grok.se (R and package versions)
##
## Option 'friendly' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## The package now is friendly by default.
##
## Option 'userAgent' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## Check wp_http_header() to know which information are send to
## stats.grok.se (R and package versions)
##
## http://stats.grok.se/json/en/201001/Citigroup
## http://stats.grok.se/json/en/201002/Citigroup
## http://stats.grok.se/json/en/201003/Citigroup
## http://stats.grok.se/json/en/201004/Citigroup
## http://stats.grok.se/json/en/201005/Citigroup
## http://stats.grok.se/json/en/201006/Citigroup
## http://stats.grok.se/json/en/201007/Citigroup
## http://stats.grok.se/json/en/201008/Citigroup
## http://stats.grok.se/json/en/201009/Citigroup
## http://stats.grok.se/json/en/201010/Citigroup
## http://stats.grok.se/json/en/201011/Citigroup
## http://stats.grok.se/json/en/201012/Citigroup
## http://stats.grok.se/json/en/201101/Citigroup
## http://stats.grok.se/json/en/201102/Citigroup
## http://stats.grok.se/json/en/201103/Citigroup
## http://stats.grok.se/json/en/201104/Citigroup
## http://stats.grok.se/json/en/201105/Citigroup
## http://stats.grok.se/json/en/201106/Citigroup
## http://stats.grok.se/json/en/201107/Citigroup
## http://stats.grok.se/json/en/201108/Citigroup
## http://stats.grok.se/json/en/201109/Citigroup
## http://stats.grok.se/json/en/201110/Citigroup
## http://stats.grok.se/json/en/201111/Citigroup
## http://stats.grok.se/json/en/201112/Citigroup
## http://stats.grok.se/json/en/201201/Citigroup
## http://stats.grok.se/json/en/201202/Citigroup
## http://stats.grok.se/json/en/201203/Citigroup
## http://stats.grok.se/json/en/201204/Citigroup
## http://stats.grok.se/json/en/201205/Citigroup
## http://stats.grok.se/json/en/201206/Citigroup
## http://stats.grok.se/json/en/201207/Citigroup
## http://stats.grok.se/json/en/201208/Citigroup
## http://stats.grok.se/json/en/201209/Citigroup
## http://stats.grok.se/json/en/201210/Citigroup
## http://stats.grok.se/json/en/201211/Citigroup
## http://stats.grok.se/json/en/201212/Citigroup
## http://stats.grok.se/json/en/201301/Citigroup
## http://stats.grok.se/json/en/201302/Citigroup
## http://stats.grok.se/json/en/201303/Citigroup
## http://stats.grok.se/json/en/201304/Citigroup
## http://stats.grok.se/json/en/201305/Citigroup
## http://stats.grok.se/json/en/201306/Citigroup
## http://stats.grok.se/json/en/201307/Citigroup
## http://stats.grok.se/json/en/201308/Citigroup
## http://stats.grok.se/json/en/201309/Citigroup
## http://stats.grok.se/json/en/201310/Citigroup
## http://stats.grok.se/json/en/201311/Citigroup
## http://stats.grok.se/json/en/201312/Citigroup
## http://stats.grok.se/json/en/201401/Citigroup
## http://stats.grok.se/json/en/201402/Citigroup
## http://stats.grok.se/json/en/201403/Citigroup
## http://stats.grok.se/json/en/201404/Citigroup
## http://stats.grok.se/json/en/201405/Citigroup
## http://stats.grok.se/json/en/201406/Citigroup
## http://stats.grok.se/json/en/201407/Citigroup
## http://stats.grok.se/json/en/201408/Citigroup
## http://stats.grok.se/json/en/201409/Citigroup
## http://stats.grok.se/json/en/201410/Citigroup
## http://stats.grok.se/json/en/201411/Citigroup
## http://stats.grok.se/json/en/201412/Citigroup
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
startDate = as.Date("2010-01-01")
endDate = as.Date("2014-12-31")
getSymbols("c", src = "yahoo", from = startDate, to = endDate)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "C"
RSI3<-RSI(Op(C), n= 3)
#Calculate a 3-period relative strength index (RSI) off the open price
EMA5<-EMA(Op(C),n=5)
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(C)-EMA5
#Let’s explore the difference between the open price and our 5-period EMA
DEMA10<-DEMA(Cl(C),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(C) - DEMA10
MACD<-MACD(Op(C),fast = 12, slow = 26, signal = 9)
#Calculate a MACD with standard parameters
MACDsignal<-MACD[,2]
#Grab just the signal line to use as our indicator.
SMI<-SMI(Op(C),n=13,slow=25,fast=2,signal=9)
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1]
#Grab just the oscillator to use as our indicator
BB<-BBands(Op(C),n=20,sd=2)
BBp<-BB[,4]
CCI20<-CCI(C[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data
PriceChange<- Cl(C) - Op(C)
#Calculate the difference between the close price and open price
Class<-ifelse(PriceChange>0,"UP","DOWN")
#Create a binary classification variable, the variable we are trying to predict.
DJIADF<-data.frame(date = index(C), C, row.names=NULL)
CombDF<-merge(views,DJIADF, by.x='date', by.y='date')
DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c)
DataSet<-DataSet[-c(1:33),]
Alldata<-cbind(DataSet,CombDF[34:1258,2])
Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))
ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)
AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1258,2])
colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Class")
TrainingSet<-AlldataNormalized[1:1000,]
TestSet<-AlldataNormalized[1001:1225,]
TrainClass<-TrainingSet[,9]
TrainPred<-TrainingSet[,-9]
TestClass<-TestSet[,9]
TestPred<-TestSet[,-9]
library(h2o)
## Loading required package: statmod
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
##
## Attaching package: 'h2o'
##
## The following objects are masked from 'package:stats':
##
## sd, var
##
## The following objects are masked from 'package:base':
##
## %*%, apply, as.factor, as.numeric, colnames, colnames<-,
## ifelse, %in%, is.factor, is.numeric, log, trunc
localH2O <- h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## /tmp/RtmpF4Z1KM/h2o_mitra2_started_from_r.out
## /tmp/RtmpF4Z1KM/h2o_mitra2_started_from_r.err
##
##
## ..Successfully connected to http://localhost:54321/
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 400 milliseconds
## H2O cluster version: 3.6.0.8
## H2O cluster name: H2O_started_from_R_mitra2_tka066
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.66 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
##
## Note: As started, H2O is limited to the CRAN default of 2 CPUs.
## Shut down and restart H2O as shown below to use all your CPUs.
## > h2o.shutdown()
## > h2o.init(nthreads = -1)
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE,
Xmx = '2g')
## Warning in h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, Xmx =
## "2g"): Xmx is a deprecated parameter. Use `max_mem_size` and `min_mem_size`
## to set the memory boundaries. Using `Xmx` to set these.
## Successfully connected to http://localhost:54321/
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 595 milliseconds
## H2O cluster version: 3.6.0.8
## H2O cluster name: H2O_started_from_R_mitra2_tka066
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.66 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
TrainH2o<-as.h2o(TrainingSet, destination_frame = "TrainH2o")
##
|
| | 0%
|
|=================================================================| 100%
head(TrainH2o)
## RSI3 EMAcross MACDsignal SMI BBp CCI20 DEMA10c
## 1 0.9331516 0.1236991 0.09775004 0.3548534 0.6627022 0.14498935 0.1561449
## 2 0.9331516 0.1229697 0.10464865 0.4170325 0.6388570 0.18036729 0.1528981
## 3 0.3934442 0.1209599 0.11062878 0.4554274 0.5330026 0.19056025 0.1552461
## 4 0.5136515 0.1215789 0.11598789 0.4891637 0.5485061 0.17375920 0.1533669
## 5 0.5136515 0.1215562 0.12074544 0.5191934 0.5380094 0.12010509 0.1536277
## 6 0.7088161 0.1221941 0.12521381 0.5534579 0.5664558 0.09677432 0.1533921
## Views Class
## 1 0.2220165 UP
## 2 0.2130447 DOWN
## 3 0.1915409 UP
## 4 0.1693250 DOWN
## 5 0.1706067 UP
## 6 0.1815722 DOWN
TestH2o<-as.h2o(TestPred, destination_frame = "TestH2o")
##
|
| | 0%
|
|=================================================================| 100%
Deeplearningmodel1 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(50,50,50,100),epochs = 100)
##
|
| | 0%
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
Deeplearningmodel2 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "Rectifier",hidden = c(50,50,50,200),epochs = 100)
##
|
| | 0%
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
Deeplearningmodel3 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "Tanh",hidden = c(100,100),epochs = 100)
##
|
| | 0%
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
Deeplearningmodel4 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(50,50,50,200),epochs = 100)
##
|
| | 0%
|
|====== | 9%
|
|=========== | 17%
|
|================= | 26%
|
|====================== | 34%
|
|============================ | 43%
|
|================================== | 52%
|
|======================================= | 60%
|
|============================================= | 69%
|
|================================================== | 77%
|
|======================================================== | 86%
|
|============================================================= | 94%
|
|=================================================================| 100%
h2o_yhat_test1 <- h2o.predict(Deeplearningmodel1,TestH2o)
df_yhat_test1 <- as.data.frame(h2o_yhat_test1)
h2o_yhat_test2 <- h2o.predict(Deeplearningmodel2,TestH2o)
df_yhat_test2 <- as.data.frame(h2o_yhat_test2)
h2o_yhat_test3 <- h2o.predict(Deeplearningmodel3,TestH2o)
df_yhat_test3 <- as.data.frame(h2o_yhat_test3)
h2o_yhat_test4 <- h2o.predict(Deeplearningmodel4,TestH2o)
df_yhat_test4 <- as.data.frame(h2o_yhat_test4)
Metrics on out of sample
prediction1 <-df_yhat_test1[,1]
table(prediction1,TestClass)
## TestClass
## prediction1 DOWN UP
## DOWN 86 7
## UP 22 110
confusionMatrix(prediction1,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 86 7
## UP 22 110
##
## Accuracy : 0.8711
## 95% CI : (0.8202, 0.912)
## No Information Rate : 0.52
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.7404
## Mcnemar's Test P-Value : 0.00933
##
## Sensitivity : 0.7963
## Specificity : 0.9402
## Pos Pred Value : 0.9247
## Neg Pred Value : 0.8333
## Prevalence : 0.4800
## Detection Rate : 0.3822
## Detection Prevalence : 0.4133
## Balanced Accuracy : 0.8682
##
## 'Positive' Class : DOWN
##
prediction2 <-df_yhat_test2[,1]
table(prediction2,TestClass)
## TestClass
## prediction2 DOWN UP
## DOWN 90 19
## UP 18 98
confusionMatrix(prediction2,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 90 19
## UP 18 98
##
## Accuracy : 0.8356
## 95% CI : (0.7805, 0.8815)
## No Information Rate : 0.52
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6707
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8333
## Specificity : 0.8376
## Pos Pred Value : 0.8257
## Neg Pred Value : 0.8448
## Prevalence : 0.4800
## Detection Rate : 0.4000
## Detection Prevalence : 0.4844
## Balanced Accuracy : 0.8355
##
## 'Positive' Class : DOWN
##
prediction3 <-df_yhat_test3[,1]
table(prediction3,TestClass)
## TestClass
## prediction3 DOWN UP
## DOWN 98 27
## UP 10 90
confusionMatrix(prediction3,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 98 27
## UP 10 90
##
## Accuracy : 0.8356
## 95% CI : (0.7805, 0.8815)
## No Information Rate : 0.52
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6726
## Mcnemar's Test P-Value : 0.008529
##
## Sensitivity : 0.9074
## Specificity : 0.7692
## Pos Pred Value : 0.7840
## Neg Pred Value : 0.9000
## Prevalence : 0.4800
## Detection Rate : 0.4356
## Detection Prevalence : 0.5556
## Balanced Accuracy : 0.8383
##
## 'Positive' Class : DOWN
##
Blending the deeplearning models Divide data into three parts