library(caret)

## Loading required package: lattice
## Loading required package: ggplot2

library(wikipediatrend)
views<-wp_trend(page = "Citigroup",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)

## Option 'requestFrom' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             Check wp_http_header() to know which information are send to 
##             stats.grok.se (R and package versions)
##             
## Option 'friendly' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             The package now is friendly by default.
##             
## Option 'userAgent' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             Check wp_http_header() to know which information are send to 
##             stats.grok.se (R and package versions)
##             
## http://stats.grok.se/json/en/201001/Citigroup
## http://stats.grok.se/json/en/201002/Citigroup
## http://stats.grok.se/json/en/201003/Citigroup
## http://stats.grok.se/json/en/201004/Citigroup
## http://stats.grok.se/json/en/201005/Citigroup
## http://stats.grok.se/json/en/201006/Citigroup
## http://stats.grok.se/json/en/201007/Citigroup
## http://stats.grok.se/json/en/201008/Citigroup
## http://stats.grok.se/json/en/201009/Citigroup
## http://stats.grok.se/json/en/201010/Citigroup
## http://stats.grok.se/json/en/201011/Citigroup
## http://stats.grok.se/json/en/201012/Citigroup
## http://stats.grok.se/json/en/201101/Citigroup
## http://stats.grok.se/json/en/201102/Citigroup
## http://stats.grok.se/json/en/201103/Citigroup
## http://stats.grok.se/json/en/201104/Citigroup
## http://stats.grok.se/json/en/201105/Citigroup
## http://stats.grok.se/json/en/201106/Citigroup
## http://stats.grok.se/json/en/201107/Citigroup
## http://stats.grok.se/json/en/201108/Citigroup
## http://stats.grok.se/json/en/201109/Citigroup
## http://stats.grok.se/json/en/201110/Citigroup
## http://stats.grok.se/json/en/201111/Citigroup
## http://stats.grok.se/json/en/201112/Citigroup
## http://stats.grok.se/json/en/201201/Citigroup
## http://stats.grok.se/json/en/201202/Citigroup
## http://stats.grok.se/json/en/201203/Citigroup
## http://stats.grok.se/json/en/201204/Citigroup
## http://stats.grok.se/json/en/201205/Citigroup
## http://stats.grok.se/json/en/201206/Citigroup
## http://stats.grok.se/json/en/201207/Citigroup
## http://stats.grok.se/json/en/201208/Citigroup
## http://stats.grok.se/json/en/201209/Citigroup
## http://stats.grok.se/json/en/201210/Citigroup
## http://stats.grok.se/json/en/201211/Citigroup
## http://stats.grok.se/json/en/201212/Citigroup
## http://stats.grok.se/json/en/201301/Citigroup
## http://stats.grok.se/json/en/201302/Citigroup
## http://stats.grok.se/json/en/201303/Citigroup
## http://stats.grok.se/json/en/201304/Citigroup
## http://stats.grok.se/json/en/201305/Citigroup
## http://stats.grok.se/json/en/201306/Citigroup
## http://stats.grok.se/json/en/201307/Citigroup
## http://stats.grok.se/json/en/201308/Citigroup
## http://stats.grok.se/json/en/201309/Citigroup
## http://stats.grok.se/json/en/201310/Citigroup
## http://stats.grok.se/json/en/201311/Citigroup
## http://stats.grok.se/json/en/201312/Citigroup
## http://stats.grok.se/json/en/201401/Citigroup
## http://stats.grok.se/json/en/201402/Citigroup
## http://stats.grok.se/json/en/201403/Citigroup
## http://stats.grok.se/json/en/201404/Citigroup
## http://stats.grok.se/json/en/201405/Citigroup
## http://stats.grok.se/json/en/201406/Citigroup
## http://stats.grok.se/json/en/201407/Citigroup
## http://stats.grok.se/json/en/201408/Citigroup
## http://stats.grok.se/json/en/201409/Citigroup
## http://stats.grok.se/json/en/201410/Citigroup
## http://stats.grok.se/json/en/201411/Citigroup
## http://stats.grok.se/json/en/201412/Citigroup

library(quantmod)

## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.

startDate = as.Date("2010-01-01")

endDate = as.Date("2014-12-31") 

getSymbols("c", src = "yahoo", from = startDate, to = endDate)

##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.

## [1] "C"

RSI3<-RSI(Op(C), n= 3) 
#Calculate a 3-period relative strength index (RSI) off the open price

EMA5<-EMA(Op(C),n=5) 
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(C)-EMA5 
#Let’s explore the difference between the open price and our 5-period EMA


DEMA10<-DEMA(Cl(C),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(C) - DEMA10

MACD<-MACD(Op(C),fast = 12, slow = 26, signal = 9) 
#Calculate a MACD with standard parameters

MACDsignal<-MACD[,2] 
#Grab just the signal line to use as our indicator.


SMI<-SMI(Op(C),n=13,slow=25,fast=2,signal=9) 
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1] 
#Grab just the oscillator to use as our indicator

BB<-BBands(Op(C),n=20,sd=2)
BBp<-BB[,4]


CCI20<-CCI(C[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data



PriceChange<- Cl(C) - Op(C) 
#Calculate the difference between the close price and open price
Class<-ifelse(PriceChange>0,"UP","DOWN") 
#Create a binary classification variable, the variable we are trying to predict.

DJIADF<-data.frame(date = index(C), C, row.names=NULL)


CombDF<-merge(views,DJIADF, by.x='date', by.y='date')

DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c) 



DataSet<-DataSet[-c(1:33),] 

Alldata<-cbind(DataSet,CombDF[34:1258,2])


Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))

ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)

AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1258,2])


colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Class") 


TrainingSet<-AlldataNormalized[1:1000,] 

TestSet<-AlldataNormalized[1001:1225,]

TrainClass<-TrainingSet[,9] 
TrainPred<-TrainingSet[,-9] 

TestClass<-TestSet[,9] 
TestPred<-TestSet[,-9]

library(h2o)

## Loading required package: statmod
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## 
## Attaching package: 'h2o'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var
## 
## The following objects are masked from 'package:base':
## 
##     %*%, apply, as.factor, as.numeric, colnames, colnames<-,
##     ifelse, %in%, is.factor, is.numeric, log, trunc

localH2O <- h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)

## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /tmp/RtmpF4Z1KM/h2o_mitra2_started_from_r.out
##     /tmp/RtmpF4Z1KM/h2o_mitra2_started_from_r.err
## 
## 
## ..Successfully connected to http://localhost:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 400 milliseconds 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_tka066 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE 
## 
## Note:  As started, H2O is limited to the CRAN default of 2 CPUs.
##        Shut down and restart H2O as shown below to use all your CPUs.
##            > h2o.shutdown()
##            > h2o.init(nthreads = -1)

localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, 
                    Xmx = '2g')

## Warning in h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, Xmx =
## "2g"): Xmx is a deprecated parameter. Use `max_mem_size` and `min_mem_size`
## to set the memory boundaries. Using `Xmx` to set these.

## Successfully connected to http://localhost:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 595 milliseconds 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_tka066 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE

TrainH2o<-as.h2o(TrainingSet, destination_frame = "TrainH2o")

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

head(TrainH2o)

##        RSI3  EMAcross MACDsignal       SMI       BBp      CCI20   DEMA10c
## 1 0.9331516 0.1236991 0.09775004 0.3548534 0.6627022 0.14498935 0.1561449
## 2 0.9331516 0.1229697 0.10464865 0.4170325 0.6388570 0.18036729 0.1528981
## 3 0.3934442 0.1209599 0.11062878 0.4554274 0.5330026 0.19056025 0.1552461
## 4 0.5136515 0.1215789 0.11598789 0.4891637 0.5485061 0.17375920 0.1533669
## 5 0.5136515 0.1215562 0.12074544 0.5191934 0.5380094 0.12010509 0.1536277
## 6 0.7088161 0.1221941 0.12521381 0.5534579 0.5664558 0.09677432 0.1533921
##       Views Class
## 1 0.2220165    UP
## 2 0.2130447  DOWN
## 3 0.1915409    UP
## 4 0.1693250  DOWN
## 5 0.1706067    UP
## 6 0.1815722  DOWN

TestH2o<-as.h2o(TestPred, destination_frame = "TestH2o")

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

Deeplearningmodel1 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(50,50,50,100),epochs = 100)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |=================================================================| 100%

Deeplearningmodel2 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "Rectifier",hidden = c(50,50,50,200),epochs = 100)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |=================================================================| 100%

Deeplearningmodel3 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "Tanh",hidden = c(100,100),epochs = 100)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |=================================================================| 100%

Deeplearningmodel4 <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(50,50,50,200),epochs = 100)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |=================================================================| 100%

h2o_yhat_test1 <- h2o.predict(Deeplearningmodel1,TestH2o)
df_yhat_test1 <- as.data.frame(h2o_yhat_test1)

h2o_yhat_test2 <- h2o.predict(Deeplearningmodel2,TestH2o)
df_yhat_test2 <- as.data.frame(h2o_yhat_test2)

h2o_yhat_test3 <- h2o.predict(Deeplearningmodel3,TestH2o)
df_yhat_test3 <- as.data.frame(h2o_yhat_test3)


h2o_yhat_test4 <- h2o.predict(Deeplearningmodel4,TestH2o)
df_yhat_test4 <- as.data.frame(h2o_yhat_test4)

Metrics on out of sample

prediction1 <-df_yhat_test1[,1] 


table(prediction1,TestClass)

##            TestClass
## prediction1 DOWN  UP
##        DOWN   86   7
##        UP     22 110

confusionMatrix(prediction1,TestClass)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction DOWN  UP
##       DOWN   86   7
##       UP     22 110
##                                          
##                Accuracy : 0.8711         
##                  95% CI : (0.8202, 0.912)
##     No Information Rate : 0.52           
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.7404         
##  Mcnemar's Test P-Value : 0.00933        
##                                          
##             Sensitivity : 0.7963         
##             Specificity : 0.9402         
##          Pos Pred Value : 0.9247         
##          Neg Pred Value : 0.8333         
##              Prevalence : 0.4800         
##          Detection Rate : 0.3822         
##    Detection Prevalence : 0.4133         
##       Balanced Accuracy : 0.8682         
##                                          
##        'Positive' Class : DOWN           
##

prediction2 <-df_yhat_test2[,1] 


table(prediction2,TestClass)

##            TestClass
## prediction2 DOWN UP
##        DOWN   90 19
##        UP     18 98

confusionMatrix(prediction2,TestClass)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction DOWN UP
##       DOWN   90 19
##       UP     18 98
##                                           
##                Accuracy : 0.8356          
##                  95% CI : (0.7805, 0.8815)
##     No Information Rate : 0.52            
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.6707          
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.8333          
##             Specificity : 0.8376          
##          Pos Pred Value : 0.8257          
##          Neg Pred Value : 0.8448          
##              Prevalence : 0.4800          
##          Detection Rate : 0.4000          
##    Detection Prevalence : 0.4844          
##       Balanced Accuracy : 0.8355          
##                                           
##        'Positive' Class : DOWN            
##

prediction3 <-df_yhat_test3[,1] 


table(prediction3,TestClass)

##            TestClass
## prediction3 DOWN UP
##        DOWN   98 27
##        UP     10 90

confusionMatrix(prediction3,TestClass)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction DOWN UP
##       DOWN   98 27
##       UP     10 90
##                                           
##                Accuracy : 0.8356          
##                  95% CI : (0.7805, 0.8815)
##     No Information Rate : 0.52            
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6726          
##  Mcnemar's Test P-Value : 0.008529        
##                                           
##             Sensitivity : 0.9074          
##             Specificity : 0.7692          
##          Pos Pred Value : 0.7840          
##          Neg Pred Value : 0.9000          
##              Prevalence : 0.4800          
##          Detection Rate : 0.4356          
##    Detection Prevalence : 0.5556          
##       Balanced Accuracy : 0.8383          
##                                           
##        'Positive' Class : DOWN            
##

Blending the deeplearning models Divide data into three parts

deepnet with h20

swarnavamitra

12/18/2015

Metrics on out of sample

Blending the deeplearning models Divide data into three parts