library(wikipediatrend)
library(classyfire)
## Loading required package: snowfall
## Loading required package: snow
## Loading required package: e1071
## Loading required package: boot
## Loading required package: neldermead
## Loading required package: optimbase
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:base':
##
## crossprod, tcrossprod
##
## Loading required package: optimsimplex
##
## Attaching package: 'optimsimplex'
##
## The following object is masked from 'package:boot':
##
## simplex
library(caret)
## Loading required package: lattice
##
## Attaching package: 'lattice'
##
## The following object is masked from 'package:boot':
##
## melanoma
##
## Loading required package: ggplot2
views<-wp_trend(page = "Subprime mortgage crisis",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)
## Option 'requestFrom' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## Check wp_http_header() to know which information are send to
## stats.grok.se (R and package versions)
##
## Option 'friendly' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## The package now is friendly by default.
##
## Option 'userAgent' is deprecated and will cause errors
## in futuere versions of the wp_trend() function. Please read
## the package vignette and/or README to learn about the new
## set of options.
##
## Check wp_http_header() to know which information are send to
## stats.grok.se (R and package versions)
##
## http://stats.grok.se/json/en/201001/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201002/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201003/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201004/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201005/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201006/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201007/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201008/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201009/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201010/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201011/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201012/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201101/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201102/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201103/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201104/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201105/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201106/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201107/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201108/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201109/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201110/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201111/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201112/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201201/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201202/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201203/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201204/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201205/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201206/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201207/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201208/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201209/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201210/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201211/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201212/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201301/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201302/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201303/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201304/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201305/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201306/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201307/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201308/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201309/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201310/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201311/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201312/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201401/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201402/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201403/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201404/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201405/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201406/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201407/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201408/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201409/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201410/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201411/Subprime_mortgage%20crisis
## http://stats.grok.se/json/en/201412/Subprime_mortgage%20crisis
Count<-views[,1:2]
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
startDate = as.Date("2010-01-01")
endDate = as.Date("2014-12-31")
getSymbols("^FTSE", src = "yahoo", from = startDate, to = endDate)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "FTSE"
RSI3<-RSI(Op(FTSE), n= 3)
#Calculate a 3-period relative strength index (RSI) off the open price
EMA5<-EMA(Op(FTSE),n=5)
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(FTSE)-EMA5
#Let’s explore the difference between the open price and our 5-period EMA
DEMA10<-DEMA(Cl(FTSE),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(FTSE) - DEMA10
MACD<-MACD(Op(FTSE),fast = 12, slow = 26, signal = 9)
#Calculate a MACD with standard parameters
MACDsignal<-MACD[,2]
#Grab just the signal line to use as our indicator.
SMI<-SMI(Op(FTSE),n=13,slow=25,fast=2,signal=9)
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1]
#Grab just the oscillator to use as our indicator
BB<-BBands(Op(FTSE),n=20,sd=2)
BBp<-BB[,4]
CCI20<-CCI(FTSE[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data
PriceChange<- Cl(FTSE) - Op(FTSE)
#Calculate the difference between the close price and open price
Class<-ifelse(PriceChange>0,"UP","DOWN")
#Create a binary classification variable, the variable we are trying to predict.
DJIADF<-data.frame(date = index(FTSE),FTSE, row.names=NULL)
CombDF<-merge(Count,DJIADF, by.x='date', by.y='date')
DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c)
DataSet<-DataSet[-c(1:33),]
Alldata<-cbind(DataSet,CombDF[34:1297,2])
Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))
ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)
AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1297,2])
colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Class")
TrainingSet<-AlldataNormalized[1:1000,]
TestSet<-AlldataNormalized[1001:1264,]
v
TrainClass<-TrainingSet[,9]
TrainPred<-TrainingSet[,-9]
TestClass<-TestSet[,9]
TestPred<-TestSet[,-9]
ens <- cfBuild(inputData = TrainPred, inputClass = TrainClass, bootNum = 6, ensNum = 6, parallel = TRUE, cpus = 4, type = "SOCK")
## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/home/mitra2/git/classyfire~+~wikipedia.Rmd',~+~~
## +~encoding~+~
## R Version: R version 3.2.3 (2015-12-10)
## snowfall 1.84-6.1 initialized (using snow 0.4-1): parallel execution on 4 CPUs.
## Library neldermead loaded.
## Library neldermead loaded in cluster.
## Library e1071 loaded.
## Library e1071 loaded in cluster.
## Library boot loaded.
## Library boot loaded in cluster.
## Library snowfall loaded.
## Library snowfall loaded in cluster.
##
##
## Stopping cluster
attributes(ens)
## $names
## [1] "testAcc" "trainAcc" "optGamma" "optCost" "totalTime"
## [6] "runTime" "confMatr" "predClasses" "testClasses" "missNames"
## [11] "accNames" "testIndx" "svmModel"
##
## $class
## [1] "list" "cfBuild"
getAvgAcc(ens)$Test
## [1] 91.09
getAvgAcc(ens)$Train
## [1] 93.3
ens$testAcc
## [1] 90.69 91.89 91.89 90.99 87.69 93.39
ens$trainAcc
## [1] 94.75 94.90 92.50 91.75 92.65 93.25
# Alternatively
getAcc(ens)$Test
## [1] 90.69 91.89 91.89 90.99 87.69 93.39
getAcc(ens)$Train
## [1] 94.75 94.90 92.50 91.75 92.65 93.25
Prediction<-cfPredict(ens,TestPred)
PredictionClass <-Prediction[,1]
out of ssample metrics
confusionMatrix( PredictionClass,TestClass)
## Confusion Matrix and Statistics
##
## Reference
## Prediction DOWN UP
## DOWN 115 9
## UP 10 130
##
## Accuracy : 0.928
## 95% CI : (0.8899, 0.9561)
## No Information Rate : 0.5265
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8556
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9200
## Specificity : 0.9353
## Pos Pred Value : 0.9274
## Neg Pred Value : 0.9286
## Prevalence : 0.4735
## Detection Rate : 0.4356
## Detection Prevalence : 0.4697
## Balanced Accuracy : 0.9276
##
## 'Positive' Class : DOWN
##
ggClassPred(ens, position = "stack", displayAll = TRUE, showText = TRUE)
ggEnsTrend(ens, showText = TRUE)
ggEnsHist(ens, density = TRUE, percentiles=TRUE, mean=TRUE)