Without wikipedia variables

library(quantmod)

## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.

library(caret)

## Loading required package: lattice
## Loading required package: ggplot2

startDate = as.Date("2010-01-01")

endDate = as.Date("2015-12-31") 


startDate = as.Date("2010-01-01")

endDate = as.Date("2015-12-31") 

getSymbols("DJIA", src = "yahoo", from = startDate, to = endDate)

##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.

## [1] "DJIA"

RSI3<-RSI(Op(DJIA), n= 3) 
#Calculate a 3-period relative strength index (RSI) off the open price

EMA5<-EMA(Op(DJIA),n=5) 
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(DJIA)-EMA5 
#Let’s explore the difference between the open price and our 5-period EMA


DEMA10<-DEMA(Cl(DJIA),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(DJIA) - DEMA10

MACD<-MACD(Op(DJIA),fast = 12, slow = 26, signal = 9) 
#Calculate a MACD with standard parameters

MACDsignal<-MACD[,2] 
#Grab just the signal line to use as our indicator.


SMI<-SMI(Op(DJIA),n=13,slow=25,fast=2,signal=9) 
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1] 
#Grab just the oscillator to use as our indicator

BB<-BBands(Op(DJIA),n=20,sd=2)
BBp<-BB[,4]


CCI20<-CCI(DJIA[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data



# Return sign creation 

ClosingPrice<-Cl(DJIA)

Trend<-diff(ClosingPrice, lag = 1, differences = 1, arithmetic = TRUE, log = FALSE, na.pad = TRUE)


#Calculate the difference between the close price at T and close  price T-1
Class<-ifelse(Trend>0,"UP","DOWN") 
#Create a binary classification variable, the variable we are trying to predict.

DJIADF<-data.frame(date = index(DJIA),DJIA, row.names=NULL)

DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c) 

DataSet<-DataSet[-c(1:33),]


Alldata<-cbind(DataSet)

Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))

ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)

AlldataNormalized<-data.frame(NormalizedData,ClassDF[33:1509,2])

TrainingSet<-AlldataNormalized[1:1000,] 

TestSet<-AlldataNormalized[1001:1477,]

TrainClass<-TrainingSet[,8] 
TrainPred<-TrainingSet[,-8] 

TestClass<-TestSet[,8] 
TestPred<-TestSet[,-8] 


library(h2o)

## Loading required package: statmod
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## 
## Attaching package: 'h2o'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var
## 
## The following objects are masked from 'package:base':
## 
##     %*%, apply, as.factor, as.numeric, colnames, colnames<-,
##     ifelse, %in%, is.factor, is.numeric, log, trunc

localH2O <- h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)

## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /tmp/Rtmp3WDRpn/h2o_mitra2_started_from_r.out
##     /tmp/Rtmp3WDRpn/h2o_mitra2_started_from_r.err
## 
## 
## ..Successfully connected to http://localhost:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 19 milliseconds 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_mxt141 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE 
## 
## Note:  As started, H2O is limited to the CRAN default of 2 CPUs.
##        Shut down and restart H2O as shown below to use all your CPUs.
##            > h2o.shutdown()
##            > h2o.init(nthreads = -1)

localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, 
                    Xmx = '2g')

## Warning in h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, Xmx =
## "2g"): Xmx is a deprecated parameter. Use `max_mem_size` and `min_mem_size`
## to set the memory boundaries. Using `Xmx` to set these.

## Successfully connected to http://localhost:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 153 milliseconds 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_mxt141 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE

TrainH2o<-as.h2o(TrainingSet, destination_frame = "TrainH2o")

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

TestH2o<-as.h2o(TestPred, destination_frame = "TestH2o")

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

model <- h2o.deeplearning(x = 1:7,y = 8,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(500,500,500),epochs = 200,rate_decay =5e-4, l1=1e-5)

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=================================================================| 100%

h2o_yhat_test <- h2o.predict(model,TestH2o)
df_yhat_test <- as.data.frame(h2o_yhat_test)

prediction <-df_yhat_test[,1] 

confusionMatrix(prediction,TestClass)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction DOWN  UP
##       DOWN  161  44
##       UP     65 207
##                                           
##                Accuracy : 0.7715          
##                  95% CI : (0.7311, 0.8084)
##     No Information Rate : 0.5262          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.5396          
##  Mcnemar's Test P-Value : 0.05541         
##                                           
##             Sensitivity : 0.7124          
##             Specificity : 0.8247          
##          Pos Pred Value : 0.7854          
##          Neg Pred Value : 0.7610          
##              Prevalence : 0.4738          
##          Detection Rate : 0.3375          
##    Detection Prevalence : 0.4298          
##       Balanced Accuracy : 0.7685          
##                                           
##        'Positive' Class : DOWN            
##

Without wikipedia variables

swarnavamitra

3/3/2016