library(wikipediatrend)
library(classyfire)
## Loading required package: snowfall
## Loading required package: snow
## Loading required package: e1071
## Loading required package: boot
## Loading required package: neldermead
## Loading required package: optimbase
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:base':
## 
##     crossprod, tcrossprod
## 
## Loading required package: optimsimplex
## 
## Attaching package: 'optimsimplex'
## 
## The following object is masked from 'package:boot':
## 
##     simplex
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## 
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## Loading required package: ggplot2
views<-wp_trend(page = "National debt of the United States",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)
## Option 'requestFrom' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             Check wp_http_header() to know which information are send to 
##             stats.grok.se (R and package versions)
##             
## Option 'friendly' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             The package now is friendly by default.
##             
## Option 'userAgent' is deprecated and will cause errors 
##             in futuere versions of the wp_trend() function. Please read 
##             the package vignette and/or README to learn about the new
##             set of options.
##             
##             Check wp_http_header() to know which information are send to 
##             stats.grok.se (R and package versions)
##             
## http://stats.grok.se/json/en/201001/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201002/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201003/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201004/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201005/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201006/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201007/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201008/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201009/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201010/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201011/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201012/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201101/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201102/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201103/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201104/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201105/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201106/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201107/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201108/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201109/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201110/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201111/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201112/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201201/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201202/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201203/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201204/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201205/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201206/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201207/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201208/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201209/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201210/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201211/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201212/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201301/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201302/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201303/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201304/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201305/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201306/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201307/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201308/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201309/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201310/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201311/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201312/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201401/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201402/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201403/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201404/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201405/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201406/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201407/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201408/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201409/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201410/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201411/National_debt%20of%20the%20United%20States
## http://stats.grok.se/json/en/201412/National_debt%20of%20the%20United%20States
Count<-views[,1:2]

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
startDate = as.Date("2010-01-01")

endDate = as.Date("2014-12-31") 

getSymbols("DJIA", src = "yahoo", from = startDate, to = endDate) 
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "DJIA"
RSI3<-RSI(Op(DJIA), n= 3) 
#Calculate a 3-period relative strength index (RSI) off the open price

EMA5<-EMA(Op(DJIA),n=5) 
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(DJIA)-EMA5 
#Let’s explore the difference between the open price and our 5-period EMA


DEMA10<-DEMA(Cl(DJIA),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(DJIA) - DEMA10

MACD<-MACD(Op(DJIA),fast = 12, slow = 26, signal = 9) 
#Calculate a MACD with standard parameters

MACDsignal<-MACD[,2] 
#Grab just the signal line to use as our indicator.


SMI<-SMI(Op(DJIA),n=13,slow=25,fast=2,signal=9) 
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1] 
#Grab just the oscillator to use as our indicator

BB<-BBands(Op(DJIA),n=20,sd=2)
BBp<-BB[,4]


CCI20<-CCI(DJIA[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data



PriceChange<- Cl(DJIA) - Op(DJIA) 
#Calculate the difference between the close price and open price
Class<-ifelse(PriceChange>0,"UP","DOWN") 
#Create a binary classification variable, the variable we are trying to predict.

DJIADF<-data.frame(date = index(DJIA),DJIA, row.names=NULL)



CombDF<-merge(Count,DJIADF, by.x='date', by.y='date')

DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c) 

DataSet<-DataSet[-c(1:33),] 

Alldata<-cbind(DataSet,CombDF[34:1258,2])


Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))

ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)

AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1258,2])


colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Class") 


TrainingSet<-AlldataNormalized[1:1000,] 

TestSet<-AlldataNormalized[1001:1225,]

v

TrainClass<-TrainingSet[,9] 
TrainPred<-TrainingSet[,-9] 

TestClass<-TestSet[,9] 
TestPred<-TestSet[,-9] 
ens <- cfBuild(inputData = TrainPred, inputClass = TrainClass, bootNum = 6, ensNum = 6,            parallel = TRUE, cpus = 4, type = "SOCK")
## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/home/mitra2/git/classyfire~+~wikipedia.Rmd',~+~~
## +~encoding~+~
## R Version:  R version 3.2.3 (2015-12-10)
## snowfall 1.84-6.1 initialized (using snow 0.4-1): parallel execution on 4 CPUs.
## Library neldermead loaded.
## Library neldermead loaded in cluster.
## Library e1071 loaded.
## Library e1071 loaded in cluster.
## Library boot loaded.
## Library boot loaded in cluster.
## Library snowfall loaded.
## Library snowfall loaded in cluster.
## 
## 
## Stopping cluster
attributes(ens)
## $names
##  [1] "testAcc"     "trainAcc"    "optGamma"    "optCost"     "totalTime"  
##  [6] "runTime"     "confMatr"    "predClasses" "testClasses" "missNames"  
## [11] "accNames"    "testIndx"    "svmModel"   
## 
## $class
## [1] "list"    "cfBuild"
getAvgAcc(ens)$Test
## [1] 91.99
getAvgAcc(ens)$Train
## [1] 94.9
ens$testAcc  
## [1] 90.09 92.19 92.49 91.89 91.89 93.39
ens$trainAcc
## [1] 98.50 92.95 94.60 93.85 94.00 95.50
# Alternatively

getAcc(ens)$Test
## [1] 90.09 92.19 92.49 91.89 91.89 93.39
getAcc(ens)$Train
## [1] 98.50 92.95 94.60 93.85 94.00 95.50

predicting on unknown sample

 Prediction<-cfPredict(ens,TestPred)
 PredictionClass <-Prediction[,1]

outof ssample metrics

 confusionMatrix( PredictionClass,TestClass)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction DOWN  UP
##       DOWN   87  15
##       UP     12 111
##                                           
##                Accuracy : 0.88            
##                  95% CI : (0.8302, 0.9194)
##     No Information Rate : 0.56            
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.7573          
##  Mcnemar's Test P-Value : 0.7003          
##                                           
##             Sensitivity : 0.8788          
##             Specificity : 0.8810          
##          Pos Pred Value : 0.8529          
##          Neg Pred Value : 0.9024          
##              Prevalence : 0.4400          
##          Detection Rate : 0.3867          
##    Detection Prevalence : 0.4533          
##       Balanced Accuracy : 0.8799          
##                                           
##        'Positive' Class : DOWN            
## 
ggClassPred(ens, position = "stack", displayAll = TRUE, showText = TRUE)

ggEnsTrend(ens, showText  = TRUE)

ggEnsHist(ens, density = TRUE, percentiles=TRUE, mean=TRUE)