library(wikipediatrend)


library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(wikipediatrend)


library(caret)
views<-wp_trend(page = "Subprime mortgage crisis",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)
## Option 'requestFrom' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             Check wp_http_header() to know which information are send to 
##             stats.grok.se (R and package versions)
##             
## Option 'friendly' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             The package now is friendly by default.
##             
## Option 'userAgent' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             Check wp_http_header() to know which information are send to 
##             stats.grok.se (R and package versions)
##             
## http://stats.grok.se/json/en/201001/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201002/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201003/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201004/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201005/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201006/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201007/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201008/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201009/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201010/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201011/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201012/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201101/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201102/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201103/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201104/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201105/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201106/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201107/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201108/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201109/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201110/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201111/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201112/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201201/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201202/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201203/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201204/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201205/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201206/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201207/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201208/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201209/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201210/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201211/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201212/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201301/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201302/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201303/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201304/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201305/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201306/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201307/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201308/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201309/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201310/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201311/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201312/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201401/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201402/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201403/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201404/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201405/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201406/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201407/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201408/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201409/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201410/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201411/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201412/Subprime_mortgage%20crisis
Count<-views[,1:2]

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
startDate = as.Date("2010-01-01")

endDate = as.Date("2014-12-31") 

getSymbols("^BVSP", src = "yahoo", from = startDate, to = endDate) 
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "BVSP"
RSI3<-RSI(Op(BVSP), n= 3) 
#Calculate a 3-period relative strength index (RSI) off the open price

EMA5<-EMA(Op(BVSP),n=5) 
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(BVSP)-EMA5 
#Let’s explore the difference between the open price and our 5-period EMA


DEMA10<-DEMA(Cl(BVSP),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(BVSP) - DEMA10

MACD<-MACD(Op(BVSP),fast = 12, slow = 26, signal = 9) 
#Calculate a MACD with standard parameters

MACDsignal<-MACD[,2] 
#Grab just the signal line to use as our indicator.


SMI<-SMI(Op(BVSP),n=13,slow=25,fast=2,signal=9) 
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1] 
#Grab just the oscillator to use as our indicator

BB<-BBands(Op(BVSP),n=20,sd=2)
BBp<-BB[,4]


CCI20<-CCI(BVSP[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data
library(h2o)
## Loading required package: statmod
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## 
## Attaching package: 'h2o'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var
## 
## The following objects are masked from 'package:base':
## 
##     %*%, apply, as.factor, as.numeric, colnames, colnames<-,
##     ifelse, %in%, is.factor, is.numeric, log, trunc
h2o.init()
## Successfully connected to http://127.0.0.1:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 hours 2 minutes 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_jos490 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE
## IP Address: 127.0.0.1 
## Port      : 54321 
## Session ID: _sid_aaba38584f3dc9f096e75f14aae74df3 
## Key Count : 0
TrainH2o<-as.h2o(views, destination_frame = "TrainH2o")
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
model<-h2o.deeplearning(x = 2, training_frame = TrainH2o, autoencoder = TRUE, hidden = c(10, 10), epochs = 5)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=================================================================| 100%
anomaly<-h2o.anomaly(model,TrainH2o)

anomalyD<-as.data.frame(anomaly)

anomalyD<-cbind(views[,1],anomalyD)

colnames(anomalyD)<-c("date","Anomaly")
# Return sign creation 

ClosingPrice<-Cl(BVSP)

Trend<-diff(ClosingPrice, lag = 1, differences = 1, arithmetic = TRUE, log = FALSE, na.pad = TRUE)

 
#Calculate the difference between the close price at T and close  price T-1
Class<-ifelse(Trend>0,"UP","DOWN") 
#Create a binary classification variable, the variable we are trying to predict.

DJIADF<-data.frame(date = index(BVSP),BVSP, row.names=NULL)

CombDF<-merge(DJIADF,Count, by.x='date', by.y='date')

CombDF<-merge(CombDF,anomalyD, by.x='date', by.y='date')


DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c) 

DataSet<-DataSet[-c(1:33),] 

Alldata<-cbind(DataSet,CombDF[34:1251,8:9])


Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))

ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)

AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1251,2])


colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Anomaly","Class") 
TrainingSet<-AlldataNormalized[1:1000,] 

TestSet<-AlldataNormalized[1001:1218,]

TrainClass<-TrainingSet[,10] 
TrainPred<-TrainingSet[,-10] 

TestClass<-TestSet[,10] 
TestPred<-TestSet[,-10] 
library(h2o)

localH2O <- h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
## Successfully connected to http://localhost:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 hours 2 minutes 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_jos490 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, 
                    Xmx = '2g')
## Warning in h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, Xmx =
## "2g"): Xmx is a deprecated parameter. Use `max_mem_size` and `min_mem_size`
## to set the memory boundaries. Using `Xmx` to set these.
## Successfully connected to http://localhost:54321/ 
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 hours 2 minutes 
##     H2O cluster version:        3.6.0.8 
##     H2O cluster name:           H2O_started_from_R_mitra2_jos490 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.66 GB 
##     H2O cluster total cores:    4 
##     H2O cluster allowed cores:  2 
##     H2O cluster healthy:        TRUE
TrainH2o<-as.h2o(TrainingSet, destination_frame = "TrainH2o")
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
TestH2o<-as.h2o(TestPred, destination_frame = "TestH2o")
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%
model <- h2o.deeplearning(x = 1:9,y = 10,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(100,150,200),epochs = 300)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
h2o_yhat_test <- h2o.predict(model,TestH2o)
df_yhat_test <- as.data.frame(h2o_yhat_test)

prediction <-df_yhat_test[,1] 

confusionMatrix(prediction,TestClass)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction DOWN  UP
##       DOWN  103   4
##       UP     12  99
##                                           
##                Accuracy : 0.9266          
##                  95% CI : (0.8835, 0.9575)
##     No Information Rate : 0.5275          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.8534          
##  Mcnemar's Test P-Value : 0.08012         
##                                           
##             Sensitivity : 0.8957          
##             Specificity : 0.9612          
##          Pos Pred Value : 0.9626          
##          Neg Pred Value : 0.8919          
##              Prevalence : 0.5275          
##          Detection Rate : 0.4725          
##    Detection Prevalence : 0.4908          
##       Balanced Accuracy : 0.9284          
##                                           
##        'Positive' Class : DOWN            
##