library(wikipediatrend)
library(classyfire)
## Loading required package: snowfall
## Loading required package: snow
## Loading required package: e1071
## Loading required package: boot
## Loading required package: neldermead
## Loading required package: optimbase
## Loading required package: Matrix
## Loading required package: optimsimplex
##
## Attaching package: 'optimsimplex'
##
## The following object is masked from 'package:boot':
##
## simplex
library(caret)
## Loading required package: lattice
##
## Attaching package: 'lattice'
##
## The following object is masked from 'package:boot':
##
## melanoma
##
## Loading required package: ggplot2
views<-wp_trend(page = "Citigroup",from = "2010-01-01",to = "2014-12-31",lang = "en",friendly = TRUE,requestFrom = "wp.trend.tester at wptt.wptt",userAgent = TRUE)
##
## Results written to:
## /home/mitra2/git/wp__Citigroup__en.csv
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
startDate = as.Date("2010-01-01")
endDate = as.Date("2014-12-31")
getSymbols("c", src = "yahoo", from = startDate, to = endDate)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "C"
RSI3<-RSI(Op(C), n= 3)
#Calculate a 3-period relative strength index (RSI) off the open price
EMA5<-EMA(Op(C),n=5)
#Calculate a 5-period exponential moving average (EMA)
EMAcross<- Op(C)-EMA5
#Let’s explore the difference between the open price and our 5-period EMA
DEMA10<-DEMA(Cl(C),n = 10, v = 1, wilder = FALSE)
DEMA10c<-Cl(C) - DEMA10
MACD<-MACD(Op(C),fast = 12, slow = 26, signal = 9)
#Calculate a MACD with standard parameters
MACDsignal<-MACD[,2]
#Grab just the signal line to use as our indicator.
SMI<-SMI(Op(C),n=13,slow=25,fast=2,signal=9)
#Stochastic Oscillator with standard parameters
SMI<-SMI[,1]
#Grab just the oscillator to use as our indicator
BB<-BBands(Op(C),n=20,sd=2)
BBp<-BB[,4]
CCI20<-CCI(C[,3:5],n=20)
#A 20-period Commodity Channel Index calculated of the High/Low/Close of our data
PriceChange<- Cl(C) - Op(C)
#Calculate the difference between the close price and open price
Class<-ifelse(PriceChange>0,"UP","DOWN")
#Create a binary classification variable, the variable we are trying to predict.
DJIADF<-data.frame(date = index(C), C, row.names=NULL)
CombDF<-merge(views,DJIADF, by.x='date', by.y='date')
DataSet<-data.frame(RSI3,EMAcross,MACDsignal,SMI,BBp,CCI20,DEMA10c)
DataSet<-DataSet[-c(1:33),]
Alldata<-cbind(DataSet,CombDF[34:1258,2])
Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
NormalizedData<-as.data.frame(lapply(Alldata,Normalized))
ClassDF<-data.frame(date = index(Class), Class, row.names=NULL)
AlldataNormalized<-data.frame(NormalizedData,ClassDF[34:1258,2])
colnames(AlldataNormalized)<-c("RSI3","EMAcross","MACDsignal","SMI","BBp","CCI20","DEMA10c","Views","Class")
TrainingSet<-AlldataNormalized[1:1000,]
TestSet<-AlldataNormalized[1001:1225,]
TrainClass<-TrainingSet[,9]
TrainPred<-TrainingSet[,-9]
TestClass<-TestSet[,9]
TestPred<-TestSet[,-9]
ens <- cfBuild(inputData = TrainPred, inputClass = TrainClass, bootNum = 6, ensNum = 6, parallel = TRUE, cpus = 4, type = "SOCK")
## Warning in searchCommandline(parallel, cpus = cpus, type = type,
## socketHosts = socketHosts, : Unknown option on commandline:
## rmarkdown::render('/home/mitra2/git/classyfire~+~wikipedia.Rmd',~+~~
## +~encoding~+~
## R Version: R version 3.1.0 (2014-04-10)
## snowfall 1.84-6 initialized (using snow 0.3-13): parallel execution on 4 CPUs.
## Library neldermead loaded.
## Library neldermead loaded in cluster.
## Library e1071 loaded.
## Library e1071 loaded in cluster.
## Library boot loaded.
## Library boot loaded in cluster.
## Library snowfall loaded.
## Library snowfall loaded in cluster.
##
##
## Stopping cluster
attributes(ens)
## $names
## [1] "testAcc" "trainAcc" "optGamma" "optCost" "totalTime"
## [6] "runTime" "confMatr" "predClasses" "testClasses" "missNames"
## [11] "accNames" "testIndx" "svmModel"
##
## $class
## [1] "list" "cfBuild"
getAvgAcc(ens)$Test
## [1] 76.25
getAvgAcc(ens)$Train
## [1] 79.61
ens$testAcc
## [1] 76.35 73.35 74.55 76.05 75.75 81.44
ens$trainAcc
## [1] 78.38 82.58 77.18 82.28 81.98 75.23
# Alternatively
getAcc(ens)$Test
## [1] 76.35 73.35 74.55 76.05 75.75 81.44
getAcc(ens)$Train
## [1] 78.38 82.58 77.18 82.28 81.98 75.23
predicting on unknown sample
cfPredict(ens,TestPred )
## Voted Class Conf Score(%)
## 1001 UP 100.00000
## 1002 UP 66.66667
## 1003 UP 100.00000
## 1004 DOWN 100.00000
## 1005 DOWN 100.00000
## 1006 DOWN 100.00000
## 1007 DOWN 100.00000
## 1008 DOWN 100.00000
## 1009 UP 100.00000
## 1010 DOWN 100.00000
## 1011 DOWN 100.00000
## 1012 UP 100.00000
## 1013 UP 83.33333
## 1014 DOWN 100.00000
## 1015 UP 100.00000
## 1016 UP 100.00000
## 1017 DOWN 83.33333
## 1018 DOWN 100.00000
## 1019 UP 100.00000
## 1020 DOWN 100.00000
## 1021 DOWN 100.00000
## 1022 DOWN 100.00000
## 1023 DOWN 100.00000
## 1024 UP 100.00000
## 1025 UP 100.00000
## 1026 UP 100.00000
## 1027 UP 100.00000
## 1028 DOWN 100.00000
## 1029 DOWN 100.00000
## 1030 UP 83.33333
## 1031 DOWN 100.00000
## 1032 DOWN 100.00000
## 1033 DOWN 100.00000
## 1034 DOWN 100.00000
## 1035 DOWN 100.00000
## 1036 DOWN 100.00000
## 1037 DOWN 100.00000
## 1038 DOWN 100.00000
## 1039 DOWN 100.00000
## 1040 DOWN 66.66667
## 1041 UP 100.00000
## 1042 DOWN 100.00000
## 1043 DOWN 100.00000
## 1044 UP 100.00000
## 1045 UP 100.00000
## 1046 DOWN 100.00000
## 1047 DOWN 50.00000
## 1048 DOWN 100.00000
## 1049 DOWN 50.00000
## 1050 UP 100.00000
## 1051 DOWN 100.00000
## 1052 DOWN 100.00000
## 1053 DOWN 100.00000
## 1054 UP 100.00000
## 1055 DOWN 100.00000
## 1056 DOWN 100.00000
## 1057 DOWN 100.00000
## 1058 DOWN 100.00000
## 1059 DOWN 100.00000
## 1060 UP 66.66667
## 1061 UP 100.00000
## 1062 DOWN 100.00000
## 1063 DOWN 100.00000
## 1064 DOWN 100.00000
## 1065 DOWN 100.00000
## 1066 DOWN 100.00000
## 1067 DOWN 100.00000
## 1068 UP 100.00000
## 1069 DOWN 100.00000
## 1070 DOWN 100.00000
## 1071 DOWN 50.00000
## 1072 DOWN 100.00000
## 1073 UP 83.33333
## 1074 DOWN 100.00000
## 1075 DOWN 100.00000
## 1076 UP 100.00000
## 1077 DOWN 50.00000
## 1078 UP 100.00000
## 1079 DOWN 100.00000
## 1080 UP 100.00000
## 1081 UP 100.00000
## 1082 UP 100.00000
## 1083 DOWN 100.00000
## 1084 DOWN 100.00000
## 1085 DOWN 100.00000
## 1086 DOWN 100.00000
## 1087 DOWN 100.00000
## 1088 UP 66.66667
## 1089 DOWN 100.00000
## 1090 DOWN 100.00000
## 1091 DOWN 100.00000
## 1092 UP 100.00000
## 1093 DOWN 100.00000
## 1094 DOWN 100.00000
## 1095 DOWN 100.00000
## 1096 DOWN 100.00000
## 1097 DOWN 100.00000
## 1098 DOWN 100.00000
## 1099 UP 100.00000
## 1100 DOWN 100.00000
## 1101 DOWN 100.00000
## 1102 DOWN 100.00000
## 1103 DOWN 100.00000
## 1104 DOWN 100.00000
## 1105 DOWN 100.00000
## 1106 UP 100.00000
## 1107 UP 100.00000
## 1108 UP 100.00000
## 1109 DOWN 100.00000
## 1110 UP 100.00000
## 1111 UP 100.00000
## 1112 DOWN 100.00000
## 1113 UP 100.00000
## 1114 DOWN 100.00000
## 1115 DOWN 83.33333
## 1116 DOWN 100.00000
## 1117 DOWN 100.00000
## 1118 UP 100.00000
## 1119 DOWN 100.00000
## 1120 DOWN 100.00000
## 1121 DOWN 83.33333
## 1122 DOWN 100.00000
## 1123 UP 100.00000
## 1124 DOWN 100.00000
## 1125 UP 100.00000
## 1126 DOWN 100.00000
## 1127 UP 100.00000
## 1128 DOWN 83.33333
## 1129 UP 100.00000
## 1130 DOWN 100.00000
## 1131 UP 100.00000
## 1132 UP 100.00000
## 1133 DOWN 66.66667
## 1134 UP 100.00000
## 1135 DOWN 50.00000
## 1136 UP 100.00000
## 1137 UP 100.00000
## 1138 DOWN 100.00000
## 1139 DOWN 83.33333
## 1140 DOWN 83.33333
## 1141 DOWN 83.33333
## 1142 DOWN 100.00000
## 1143 DOWN 50.00000
## 1144 DOWN 100.00000
## 1145 DOWN 100.00000
## 1146 DOWN 100.00000
## 1147 UP 100.00000
## 1148 UP 100.00000
## 1149 DOWN 66.66667
## 1150 DOWN 100.00000
## 1151 UP 83.33333
## 1152 DOWN 83.33333
## 1153 UP 100.00000
## 1154 DOWN 100.00000
## 1155 DOWN 100.00000
## 1156 UP 100.00000
## 1157 UP 100.00000
## 1158 DOWN 100.00000
## 1159 UP 100.00000
## 1160 UP 66.66667
## 1161 DOWN 100.00000
## 1162 DOWN 100.00000
## 1163 UP 100.00000
## 1164 UP 100.00000
## 1165 DOWN 100.00000
## 1166 DOWN 100.00000
## 1167 UP 100.00000
## 1168 DOWN 100.00000
## 1169 DOWN 100.00000
## 1170 DOWN 100.00000
## 1171 UP 83.33333
## 1172 DOWN 100.00000
## 1173 UP 100.00000
## 1174 DOWN 100.00000
## 1175 UP 100.00000
## 1176 UP 100.00000
## 1177 DOWN 100.00000
## 1178 UP 100.00000
## 1179 UP 100.00000
## 1180 DOWN 100.00000
## 1181 UP 100.00000
## 1182 DOWN 83.33333
## 1183 UP 100.00000
## 1184 UP 66.66667
## 1185 DOWN 100.00000
## 1186 DOWN 100.00000
## 1187 UP 100.00000
## 1188 DOWN 100.00000
## 1189 DOWN 100.00000
## 1190 UP 66.66667
## 1191 DOWN 100.00000
## 1192 DOWN 100.00000
## 1193 DOWN 100.00000
## 1194 DOWN 100.00000
## 1195 UP 100.00000
## 1196 DOWN 66.66667
## 1197 DOWN 100.00000
## 1198 UP 83.33333
## 1199 DOWN 100.00000
## 1200 UP 100.00000
## 1201 UP 66.66667
## 1202 DOWN 100.00000
## 1203 UP 100.00000
## 1204 DOWN 100.00000
## 1205 UP 100.00000
## 1206 UP 100.00000
## 1207 UP 100.00000
## 1208 UP 100.00000
## 1209 UP 100.00000
## 1210 DOWN 66.66667
## 1211 DOWN 100.00000
## 1212 DOWN 100.00000
## 1213 DOWN 100.00000
## 1214 DOWN 100.00000
## 1215 DOWN 100.00000
## 1216 UP 100.00000
## 1217 UP 100.00000
## 1218 UP 100.00000
## 1219 DOWN 83.33333
## 1220 UP 100.00000
## 1221 DOWN 83.33333
## 1222 DOWN 100.00000
## 1223 UP 100.00000
## 1224 UP 100.00000
## 1225 DOWN 100.00000
ggClassPred(ens, position = "stack", displayAll = TRUE, showText = TRUE)

ggEnsTrend(ens, showText = TRUE)

ggEnsHist(ens, density = TRUE, percentiles=TRUE, mean=TRUE)
