library(wikipediatrend)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
views<-wp_trend(page = "Subprime mortgage crisis",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)
## Option 'requestFrom' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## Check wp_http_header() to know which information are send to
## stats.grok.se (R and package versions)
##
## Option 'friendly' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## The package now is friendly by default.
##
## Option 'userAgent' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## Check wp_http_header() to know which information are send to
## stats.grok.se (R and package versions)
##
## http://stats.grok.se/json/en/201001/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201002/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201003/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201004/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201005/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201006/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201007/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201008/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201009/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201010/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201011/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201012/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201101/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201102/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201103/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201104/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201105/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201106/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201107/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201108/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201109/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201110/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201111/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201112/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201201/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201202/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201203/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201204/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201205/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201206/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201207/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201208/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201209/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201210/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201211/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201212/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201301/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201302/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201303/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201304/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201305/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201306/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201307/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201308/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201309/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201310/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201311/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201312/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201401/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201402/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201403/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201404/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201405/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201406/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201407/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201408/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201409/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201410/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201411/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201412/Subprime_mortgage%20crisis
Count<-views[,1:2]
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
startDate = as.Date("2010-01-01")
endDate = as.Date("2014-12-31")
getSymbols("^BVSP", src = "yahoo", from = startDate, to = endDate)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "BVSP"
RSI3<-RSI(Op(BVSP), n= 3)
#Calculate a 3-period relative strength index (RSI) off the open price
EMA5<-EMA(Op(BVSP),n=5)
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(BVSP)-EMA5
#Let’s explore the difference between the open price and our 5-period EMA
DEMA10<-DEMA(Cl(BVSP),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(BVSP) - DEMA10
MACD<-MACD(Op(BVSP),fast = 12, slow = 26, signal = 9)
#Calculate a MACD with standard parameters
MACDsignal<-MACD[,2]
#Grab just the signal line to use as our indicator.
SMI<-SMI(Op(BVSP),n=13,slow=25,fast=2,signal=9)
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1]
#Grab just the oscillator to use as our indicator
BB<-BBands(Op(BVSP),n=20,sd=2)
BBp<-BB[,4]
CCI20<-CCI(BVSP[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data
# Return sign creation
ClosingPrice<-Cl(BVSP)
Trend<-diff(ClosingPrice, lag = 1, differences = 1, arithmetic = TRUE, log = FALSE, na.pad = TRUE)
#Calculate the difference between the close price at T and close price T-1
Class<-ifelse(Trend>0,"UP","DOWN")
#Create a binary classification variable, the variable we are trying to predict.
DJIADF<-data.frame(date = index(BVSP),BVSP, row.names=NULL)
CombDF<-merge(Count,DJIADF, by.x='date', by.y='date')
DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c)
DataSet<-DataSet[-c(1:33),]
Alldata<-cbind(DataSet,CombDF[34:1251,2])
Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))
ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)
AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1251,2])
colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Class")
TrainingSet<-AlldataNormalized[1:1000,]
TestSet<-AlldataNormalized[1001:1218,]
TrainClass<-TrainingSet[,9]
TrainPred<-TrainingSet[,-9]
TestClass<-TestSet[,9]
TestPred<-TestSet[,-9]
library(h2o)
## Loading required package: statmod
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
##
## Attaching package: 'h2o'
##
## The following objects are masked from 'package:stats':
##
## sd, var
##
## The following objects are masked from 'package:base':
##
## %*%, apply, as.factor, as.numeric, colnames, colnames<-,
## ifelse, %in%, is.factor, is.numeric, log, trunc
localH2O <- h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
## Successfully connected to http://localhost:54321/
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 hours 32 minutes
## H2O cluster version: 3.6.0.8
## H2O cluster name: H2O_started_from_R_mitra2_uhd310
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.66 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE,
Xmx = '2g')
## Warning in h2o.init(ip = "localhost", port = 54321, startH2O = TRUE, Xmx =
## "2g"): Xmx is a deprecated parameter. Use `max_mem_size` and `min_mem_size`
## to set the memory boundaries. Using `Xmx` to set these.
## Successfully connected to http://localhost:54321/
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 hours 32 minutes
## H2O cluster version: 3.6.0.8
## H2O cluster name: H2O_started_from_R_mitra2_uhd310
## H2O cluster total nodes: 1
## H2O cluster total memory: 0.66 GB
## H2O cluster total cores: 4
## H2O cluster allowed cores: 2
## H2O cluster healthy: TRUE
TrainH2o<-as.h2o(TrainingSet, destination_frame = "TrainH2o")
##
|
| | 0%
|
|=================================================================| 100%
TestH2o<-as.h2o(TestPred, destination_frame = "TestH2o")
##
|
| | 0%
|
|=================================================================| 100%
deepnet
hidden_opt <- list(c(200,200), c(100,300,100), c(500,500,500))
l1_opt <- c(1e-5,1e-7)
hyper_params <- list(hidden = hidden_opt, l1 = l1_opt)
model_grid <- h2o.grid("deeplearning",hyper_params = hyper_params,x = 1:8,y = 9,training_frame = TrainH2o,distribution = "multinomial", activation = "TanhWithDropout")
##
|
| | 0%
|
|=========== | 17%
|
|====================== | 33%
|
|================================ | 50%
|
|=========================================== | 67%
|
|====================================================== | 83%
|
|=================================================================| 100%
model <- h2o.deeplearning(x = 1:8,y = 9,training_frame = TrainH2o, activation = "TanhWithDropout",hidden = c(50,50,50),epochs = 100)
##
|
| | 0%
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
summary(model_grid)
## H2O Grid Details
## ================
##
## Grid ID: Grid_DeepLearning_TrainH2o_model_R_1456225517775_10
## Used hyper parameters:
## - l1
## - hidden
## Number of models: 6
## Number of failed models: 0
##
## Generated models
## ----------------
## l1 hidden status_ok
## 1e-05 [500,500,500] OK
## 1e-07 [200,200] OK
## 1e-05 [100,300,100] OK
## 1e-05 [200,200] OK
## 1e-07 [100,300,100] OK
## 1e-07 [500,500,500] OK
## model_ids
## Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_4
## Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_1
## Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_2
## Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_0
## Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_3
## Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_5
## H2O Grid Summary
## ================
##
## Grid ID: Grid_DeepLearning_TrainH2o_model_R_1456225517775_10
## Used hyper parameters:
## - l1
## - hidden
## Number of models: 6
## - Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_4
## - Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_1
## - Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_2
## - Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_0
## - Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_3
## - Grid_DeepLearning_TrainH2o_model_R_1456225517775_10_model_5
##
## Number of failed models: 0
model_ids <- model_grid@model_ids
models <- lapply(model_ids, function(id) { h2o.getModel(id)})
h2o_yhat_test <- h2o.predict(model,TestH2o)
df_yhat_test <- as.data.frame(h2o_yhat_test)
prediction <-df_yhat_test[,1]
confusionMatrix(prediction,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 103 5
## UP 12 98
##
## Accuracy : 0.922
## 95% CI : (0.8781, 0.9539)
## No Information Rate : 0.5275
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8441
## Mcnemar's Test P-Value : 0.1456
##
## Sensitivity : 0.8957
## Specificity : 0.9515
## Pos Pred Value : 0.9537
## Neg Pred Value : 0.8909
## Prevalence : 0.5275
## Detection Rate : 0.4725
## Detection Prevalence : 0.4954
## Balanced Accuracy : 0.9236
##
## 'Positive' Class : DOWN
##
svm with rbf kernel
library(e1071)
svm.model <- svm( Class~ ., data = TrainingSet, cost = 10, gamma = 1)
svm.pred <- predict(svm.model, TestSet)
confusionMatrix( svm.pred ,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 97 4
## UP 18 99
##
## Accuracy : 0.8991
## 95% CI : (0.8512, 0.9357)
## No Information Rate : 0.5275
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.799
## Mcnemar's Test P-Value : 0.005578
##
## Sensitivity : 0.8435
## Specificity : 0.9612
## Pos Pred Value : 0.9604
## Neg Pred Value : 0.8462
## Prevalence : 0.5275
## Detection Rate : 0.4450
## Detection Prevalence : 0.4633
## Balanced Accuracy : 0.9023
##
## 'Positive' Class : DOWN
##
library(nnet)
nn <- nnet(Class ~ ., data =TrainingSet, size = 2, rang = 0.1,decay = 5e-4, maxit = 200)
## # weights: 21
## initial value 695.275538
## iter 10 value 591.434830
## iter 20 value 164.419634
## iter 30 value 122.711158
## iter 40 value 110.214706
## iter 50 value 107.699342
## iter 60 value 106.912322
## iter 70 value 106.143045
## iter 80 value 105.702423
## iter 90 value 105.574172
## iter 100 value 105.561188
## iter 110 value 105.557991
## iter 120 value 105.549244
## iter 130 value 105.547990
## final value 105.547376
## converged
nnPred<-predict(nn,TestSet,type = "class")
confusionMatrix(nnPred,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 100 5
## UP 15 98
##
## Accuracy : 0.9083
## 95% CI : (0.8619, 0.9431)
## No Information Rate : 0.5275
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.8169
## Mcnemar's Test P-Value : 0.04417
##
## Sensitivity : 0.8696
## Specificity : 0.9515
## Pos Pred Value : 0.9524
## Neg Pred Value : 0.8673
## Prevalence : 0.5275
## Detection Rate : 0.4587
## Detection Prevalence : 0.4817
## Balanced Accuracy : 0.9105
##
## 'Positive' Class : DOWN
##
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:ggplot2':
##
## margin
model.rf<-randomForest(Class ~ ., data=TrainingSet, ntree=1000,keep.forest=TRUE, importance=TRUE)
rf.pred <- predict(model.rf,TestSet )
confusionMatrix( rf.pred ,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 102 3
## UP 13 100
##
## Accuracy : 0.9266
## 95% CI : (0.8835, 0.9575)
## No Information Rate : 0.5275
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.8535
## Mcnemar's Test P-Value : 0.02445
##
## Sensitivity : 0.8870
## Specificity : 0.9709
## Pos Pred Value : 0.9714
## Neg Pred Value : 0.8850
## Prevalence : 0.5275
## Detection Rate : 0.4679
## Detection Prevalence : 0.4817
## Balanced Accuracy : 0.9289
##
## 'Positive' Class : DOWN
##