DowJonesData <- read.csv("dow_jones_index.csv", colClasses=c("numeric", "character", "character", rep("numeric",13)), header=TRUE, sep=",")
DJIndexOnly <- read.csv("DJIndexOnly.csv", colClasses=c("numeric", "character", rep("numeric",5)), header=TRUE, sep=",")
DowJonesData$date <- format(strptime(as.character(DowJonesData$date), "%m/%d/%Y"), "%Y-%m-%d")
DJIndexOnly$Date <- format(strptime(as.character(DJIndexOnly$Date), "%m/%d/%Y"), "%Y-%m-%d")
MMMStock <- filter(DowJonesData, stock == 'MMM')
AXPStock <- filter(DowJonesData, stock == 'AXP')
AAStock <- filter(DowJonesData, stock == 'AA')
TStock <- filter(DowJonesData, stock == 'T')
BACStock <- filter(DowJonesData, stock == 'BAC')
BAStock <- filter(DowJonesData, stock == 'BA')
CATStock <- filter(DowJonesData, stock == 'CAT')
CVXStock <- filter(DowJonesData, stock == 'CVX')
CSCOStock <- filter(DowJonesData, stock == 'CSCO')
KOStock <- filter(DowJonesData, stock == 'KO')
DDStock <- filter(DowJonesData, stock == 'DD')
XOMStock <- filter(DowJonesData, stock == 'XOM')
GEStock <- filter(DowJonesData, stock == 'GE')
HPQStock <- filter(DowJonesData, stock == 'HPQ')
HDStock <- filter(DowJonesData, stock == 'HD')
INTCStock <- filter(DowJonesData, stock == 'INTC')
IBMStock <- filter(DowJonesData, stock == 'IBM')
JNJStock <- filter(DowJonesData, stock == 'JNJ')
JPMStock <- filter(DowJonesData, stock == 'JPM')
KRFTStock <- filter(DowJonesData, stock == 'KRFT')
MCDStock <- filter(DowJonesData, stock == 'MCD')
MRKStock <- filter(DowJonesData, stock == 'MRK')
MSFTStock <- filter(DowJonesData, stock == 'MSFT')
PFEStock <- filter(DowJonesData, stock == 'PFE')
PGStock <- filter(DowJonesData, stock == 'PG')
TRVStock <- filter(DowJonesData, stock == 'TRV')
UTXStock <- filter(DowJonesData, stock == 'UTX')
VZStock <- filter(DowJonesData, stock == 'VZ')
WMTStock <- filter(DowJonesData, stock == 'WMT')
DISStock <- filter(DowJonesData, stock == 'DIS')
ReturnDJI <- na.omit(Delt(DJIndexOnly[,6]))
MMMReturn <- na.omit(Delt(MMMStock[,7]))
AXPReturn <- na.omit(Delt(AXPStock[,7]))
AAReturn <- na.omit(Delt(AAStock[,7]))
TReturn <- na.omit(Delt(TStock[,7]))
BACReturn <- na.omit(Delt(BACStock[,7]))
BAReturn <- na.omit(Delt(BAStock[,7]))
CATReturn <- na.omit(Delt(CATStock[,7]))
CVXReturn <- na.omit(Delt(CVXStock[,7]))
CSCOReturn <- na.omit(Delt(CSCOStock[,7]))
KOReturn <- na.omit(Delt(KOStock[,7]))
DDReturn <- na.omit(Delt(DDStock[,7]))
XOMReturn <- na.omit(Delt(XOMStock[,7]))
GEReturn <- na.omit(Delt(GEStock[,7]))
HPQReturn <- na.omit(Delt(HPQStock[,7]))
HDReturn <- na.omit(Delt(HDStock[,7]))
INTCReturn <- na.omit(Delt(INTCStock[,7]))
IBMReturn <- na.omit(Delt(IBMStock[,7]))
JNJReturn <- na.omit(Delt(JNJStock[,7]))
JPMReturn <- na.omit(Delt(JPMStock[,7]))
KRFTReturn <- na.omit(Delt(KRFTStock[,7]))
MCDReturn <- na.omit(Delt(MCDStock[,7]))
MRKReturn <- na.omit(Delt(MRKStock[,7]))
MSFTReturn <- na.omit(Delt(MSFTStock[,7]))
PFEReturn <- na.omit(Delt(PFEStock[,7]))
PGReturn <- na.omit(Delt(PGStock[,7]))
TRVReturn <- na.omit(Delt(TRVStock[,7]))
UTXReturn <- na.omit(Delt(UTXStock[,7]))
VZReturn <- na.omit(Delt(VZStock[,7]))
WMTReturn <- na.omit(Delt(WMTStock[,7]))
DISReturn <- na.omit(Delt(DISStock[,7]))
# Merge the returns of all three index into single data
DJStockReturns = cbind(ReturnDJI, MMMReturn, AXPReturn, AAReturn, TReturn,
BACReturn, BAReturn, CATReturn, CVXReturn, CSCOReturn,
KOReturn, DDReturn, XOMReturn, GEReturn, HPQReturn,
HDReturn, INTCReturn, IBMReturn, JNJReturn, JPMReturn,
KRFTReturn, MCDReturn, MRKReturn, MSFTReturn, PFEReturn,
PGReturn, TRVReturn, UTXReturn, VZReturn, WMTReturn,
DISReturn)
#Column names
DJColNames <- c("DJI", "MMM", "AXP", "AA", "T", "BAC", "BA", "CAT", "CVX",
"CSCO", "KO", "DD", "XOM", "GE", "HPQ", "HD", "INTC", "IBM",
"JNJ", "JPM", "KRFT", "MCD", "MRK", "MSFT", "PFE", "PG",
"TRV", "UTX", "VZ", "WMT", "DIS")
# Monthly Return of Investment of Interest
colnames(DJStockReturns) = DJColNames
# See how the data looks. You can see the risk is much lesser in the S&P as this
# represents the market, compared to the individual stock.
par(mfrow = c(2,1))
boxplot(DJStockReturns[], main="Expected Return", xlab="Stock Picks", ylab="Return")

# According to the CAPM formula, we will first get the beta of each stock by
# regressions; then calculate the expected return of each stock and the covariance
# matrix of the four stocks; finally we can calculate the optimal asset allocations
# (weights) of the portfolio consisting of the 2 stocks.
# Model for 3M. Model fit can be judged by R-sq. Using LM here as interpretation
# is key. SVM is not so useful.
lm.3M<- lm(MMM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.3M)
Beta3M <- summary(lm.3M)$coefficients[2, 1]
# Model for AXP Model fit can be judged by R-sq.
lm.AXP<- lm(AXP ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.AXP)
BetaAXP <- summary(lm.AXP)$coefficients[2, 1]
# Model for AA Model fit can be judged by R-sq.
lm.AA<- lm(AA ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.AA)
BetaAA <- summary(lm.AA)$coefficients[2, 1]
# Model for T Model fit can be judged by R-sq.
lm.T<- lm(T ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.T)
BetaT <- summary(lm.T)$coefficients[2, 1]
# Model for BAC Model fit can be judged by R-sq.
lm.BAC<- lm(BAC ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.BAC)
BetaBAC <- summary(lm.BAC)$coefficients[2, 1]
# Model for BA Model fit can be judged by R-sq.
lm.BA<- lm(BA ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.BA)
BetaBA <- summary(lm.BA)$coefficients[2, 1]
# Model for CAT Model fit can be judged by R-sq.
lm.CAT<- lm(CAT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.CAT)
BetaCAT <- summary(lm.CAT)$coefficients[2, 1]
# Model for CVX Model fit can be judged by R-sq.
lm.CVX<- lm(CVX ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.CVX)
BetaCVX <- summary(lm.CVX)$coefficients[2, 1]
# Model for CSCO Model fit can be judged by R-sq.
lm.CSCO<- lm(CSCO ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.CSCO)
BetaCSCO <- summary(lm.CSCO)$coefficients[2, 1]
# Model for KO Model fit can be judged by R-sq.
lm.KO<- lm(KO ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.KO)
BetaKO <- summary(lm.KO)$coefficients[2, 1]
# Model for DD Model fit can be judged by R-sq.
lm.DD<- lm(DD ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.DD)
BetaDD <- summary(lm.DD)$coefficients[2, 1]
# Model for XOM Model fit can be judged by R-sq.
lm.XOM<- lm(XOM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.XOM)
BetaXOM <- summary(lm.XOM)$coefficients[2, 1]
# Model for GE Model fit can be judged by R-sq.
lm.GE<- lm(GE ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.GE)
BetaGE <- summary(lm.GE)$coefficients[2, 1]
# Model for HPQ Model fit can be judged by R-sq.
lm.HPQ<- lm(HPQ ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.HPQ)
BetaHPQ <- summary(lm.HPQ)$coefficients[2, 1]
# Model for HD Model fit can be judged by R-sq.
lm.HD<- lm(HD ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.HD)
BetaHD <- summary(lm.HD)$coefficients[2, 1]
# Model for INTC Model fit can be judged by R-sq.
lm.INTC<- lm(INTC ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.INTC)
BetaINTC <- summary(lm.INTC)$coefficients[2, 1]
# Model for IBM Model fit can be judged by R-sq.
lm.IBM<- lm(IBM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.IBM)
BetaIBM <- summary(lm.IBM)$coefficients[2, 1]
# Model for JNJ Model fit can be judged by R-sq.
lm.JNJ<- lm(JNJ ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.JNJ)
BetaJNJ <- summary(lm.JNJ)$coefficients[2, 1]
# Model for JPM Model fit can be judged by R-sq.
lm.JPM<- lm(JPM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.JPM)
BetaJPM <- summary(lm.JPM)$coefficients[2, 1]
# Model for KRFT Model fit can be judged by R-sq.
lm.KRFT<- lm(KRFT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.KRFT)
BetaKRFT <- summary(lm.KRFT)$coefficients[2, 1]
# Model for MCD Model fit can be judged by R-sq.
lm.MCD<- lm(MCD ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.MCD)
BetaMCD <- summary(lm.MCD)$coefficients[2, 1]
# Model for MRK Model fit can be judged by R-sq.
lm.MRK<- lm(MRK ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.MRK)
BetaMRK <- summary(lm.MRK)$coefficients[2, 1]
# Model for MSFT Model fit can be judged by R-sq.
lm.MSFT<- lm(MSFT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.MSFT)
BetaMSFT <- summary(lm.MSFT)$coefficients[2, 1]
# Model for PFE Model fit can be judged by R-sq.
lm.PFE<- lm(PFE ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.PFE)
BetaPFE <- summary(lm.PFE)$coefficients[2, 1]
# Model for PG Model fit can be judged by R-sq.
lm.PG<- lm(PG ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.PG)
BetaPG <- summary(lm.PG)$coefficients[2, 1]
# Model for TRV Model fit can be judged by R-sq.
lm.TRV<- lm(TRV ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.TRV)
BetaTRV <- summary(lm.TRV)$coefficients[2, 1]
# Model for UTX Model fit can be judged by R-sq.
lm.UTX<- lm(UTX ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.UTX)
BetaUTX <- summary(lm.UTX)$coefficients[2, 1]
# Model for VZ Model fit can be judged by R-sq.
lm.VZ<- lm(VZ ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.VZ)
BetaVZ <- summary(lm.VZ)$coefficients[2, 1]
# Model for WMT Model fit can be judged by R-sq.
lm.WMT<- lm(WMT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.WMT)
BetaWMT <- summary(lm.WMT)$coefficients[2, 1]
# Model for DIS Model fit can be judged by R-sq.
lm.DIS<- lm(DIS ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.DIS)
BetaDIS <- summary(lm.DIS)$coefficients[2, 1]
betas <- c(Beta3M,BetaAXP, BetaAA, BetaT, BetaBAC, BetaBA, BetaCAT, BetaCVX, BetaCSCO, BetaKO, BetaDD, BetaXOM, BetaGE, BetaHPQ, BetaHD, BetaINTC, BetaIBM, BetaJNJ, BetaJPM, BetaKRFT, BetaMCD, BetaMRK, BetaMSFT, BetaPFE, BetaPG, BetaTRV, BetaUTX, BetaVZ, BetaWMT, BetaDIS)
stocktic <- c("MMM", "AXP", "AA", "T", "BAC", "BA", "CAT", "CVX",
"CSCO", "KO", "DD", "XOM", "GE", "HPQ", "HD", "INTC", "IBM",
"JNJ", "JPM", "KRFT", "MCD", "MRK", "MSFT", "PFE", "PG",
"TRV", "UTX", "VZ", "WMT", "DIS")
stockbeta_df <- data.frame(stock=stocktic,beta = betas)
stockbeta_df %>% arrange(desc(beta))
## stock beta
## 1 CAT 0.92789831
## 2 DD 0.84145395
## 3 AA 0.74909840
## 4 UTX 0.44934253
## 5 INTC 0.43036653
## 6 CVX 0.30620406
## 7 VZ 0.25720271
## 8 HPQ 0.24479489
## 9 HD 0.19832895
## 10 DIS 0.19028154
## 11 IBM 0.13346597
## 12 XOM 0.10266945
## 13 TRV 0.06636522
## 14 AXP 0.03239515
## 15 KO -0.04235410
## 16 MSFT -0.04802629
## 17 KRFT -0.06118033
## 18 WMT -0.07107965
## 19 GE -0.12849894
## 20 MMM -0.13421958
## 21 JNJ -0.17262055
## 22 JPM -0.18909367
## 23 CSCO -0.21872018
## 24 T -0.22282535
## 25 PG -0.22741460
## 26 BA -0.23741113
## 27 PFE -0.28261619
## 28 BAC -0.33132903
## 29 MCD -0.42867590
## 30 MRK -0.46722907
# If a stock has a beta of 1.00, it indicates that its price is correlated with
# the market. A stock like that has systemic risk, but the beta calculation can’t
# detect any unsystemic risk. Adding a stock to a portfolio with a beta of 1.00
# doesn’t add any risk to the portfolio, but it also doesn’t increase the likelihood
# that the portfolio will provide excess return.
# A beta of less than 1.00 means that the security is theoretically less volatile
# than the market which means the portfolio is less risky with the stock included
# than without it.
# A beta that is greater than 1.00 indicates that the security's price is
# theoretically more volatile than the market. For example, if a stock's beta is
# 1.20, it is assumed to be 20% more volatile than the market. Technology stocks and
# small caps tend to have higher betas than the market benchmark. This indicates that
# adding the stock to a portfolio will increase the portfolio’s risk, but also
# increase its expected return.
# For computation of return on investment, you can use US Treasury bill rates as
# the risk free rate, and the S&P as the market rate.
#ROI Calculation
#US Treasury bill rates(Jan 1, 2011) 3.39% (risk free rate for 2 quarters)
#DJI market rate for 2011 - 7.22% (for 2 quarters)
#ERi=3.39%+betai(7.22%−3.39%)
#For example, for DD stock the calculation is shown below:
#Beta of DD: 0.841453946721087
#DD (roi) = 6.61%
ROI.3M <- (3.39 + Beta3M * (7.22 - 3.39))
ROI.AXP <- (3.39 + BetaAXP * (7.22 - 3.39))
ROI.AA <- (3.39 + BetaAA * (7.22 - 3.39))
ROI.T <- (3.39 + BetaT * (7.22 - 3.39))
ROI.BAC <- (3.39 + BetaBAC * (7.22 - 3.39))
ROI.BA <- (3.39 + BetaBA * (7.22 - 3.39))
ROI.CAT <- (3.39 + BetaCAT * (7.22 - 3.39))
ROI.CVX <- (3.39 + BetaCVX * (7.22 - 3.39))
ROI.CSCO <- (3.39 + BetaCSCO * (7.22 - 3.39))
ROI.KO <- (3.39 + BetaKO * (7.22 - 3.39))
ROI.DD <- (3.39 + BetaDD * (7.22 - 3.39))
ROI.XOM <- (3.39 + BetaXOM * (7.22 - 3.39))
ROI.GE <- (3.39 + BetaGE * (7.22 - 3.39))
ROI.HPQ <- (3.39 + BetaHPQ * (7.22 - 3.39))
ROI.HD <- (3.39 + BetaHD * (7.22 - 3.39))
ROI.INTC <- (3.39 + BetaINTC * (7.22 - 3.39))
ROI.IBM <- (3.39 + BetaIBM * (7.22 - 3.39))
ROI.JNJ <- (3.39 + BetaJNJ * (7.22 - 3.39))
ROI.JPM <- (3.39 + BetaJPM * (7.22 - 3.39))
ROI.KRFT <- (3.39 + BetaKRFT * (7.22 - 3.39))
ROI.MCD <- (3.39 + BetaMCD * (7.22 - 3.39))
ROI.MRK <- (3.39 + BetaMRK * (7.22 - 3.39))
ROI.MSFT <- (3.39 + BetaMSFT * (7.22 - 3.39))
ROI.PFE <- (3.39 + BetaPFE * (7.22 - 3.39))
ROI.PG <- (3.39 + BetaPG * (7.22 - 3.39))
ROI.TRV <- (3.39 + BetaTRV * (7.22 - 3.39))
ROI.UTX <- (3.39 + BetaUTX * (7.22 - 3.39))
ROI.VZ <- (3.39 + BetaVZ * (7.22 - 3.39))
ROI.WMT <- (3.39 + BetaWMT * (7.22 - 3.39))
ROI.DIS <- (3.39 + BetaDIS * (7.22 - 3.39))
#Build the ROI for the tic
rois <- c(ROI.3M, ROI.AXP, ROI.AA, ROI.T, ROI.BAC, ROI.BA, ROI.CAT, ROI.CVX,
ROI.CSCO, ROI.KO, ROI.DD, ROI.XOM, ROI.GE, ROI.HPQ, ROI.HD, ROI.INTC,
ROI.IBM, ROI.JNJ, ROI.JPM, ROI.KRFT, ROI.MCD, ROI.MRK, ROI.MSFT,
ROI.PFE, ROI.PG, ROI.TRV, ROI.UTX, ROI.VZ, ROI.WMT, ROI.DIS)
stockroi_df <- data.frame(stock=stocktic, roi = rois)
stockroi_df %>% arrange(desc(rois))
## stock roi
## 1 CAT 6.943851
## 2 DD 6.612769
## 3 AA 6.259047
## 4 UTX 5.110982
## 5 INTC 5.038304
## 6 CVX 4.562762
## 7 VZ 4.375086
## 8 HPQ 4.327564
## 9 HD 4.149600
## 10 DIS 4.118778
## 11 IBM 3.901175
## 12 XOM 3.783224
## 13 TRV 3.644179
## 14 AXP 3.514073
## 15 KO 3.227784
## 16 MSFT 3.206059
## 17 KRFT 3.155679
## 18 WMT 3.117765
## 19 GE 2.897849
## 20 MMM 2.875939
## 21 JNJ 2.728863
## 22 JPM 2.665771
## 23 CSCO 2.552302
## 24 T 2.536579
## 25 PG 2.519002
## 26 BA 2.480715
## 27 PFE 2.307580
## 28 BAC 2.121010
## 29 MCD 1.748171
## 30 MRK 1.600513
#Adding lag for close and open to all the stocks
MMMStockLag <- mutate(MMMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
AXPStockLag <- mutate(AXPStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
AAStockLag <- mutate(AAStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
TStockLag <- mutate(TStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
BACStockLag <- mutate(BACStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
BAStockLag <- mutate(BAStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
CATStockLag <- mutate(CATStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
CVXStockLag <- mutate(CVXStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
CSCOStockLag <- mutate(CSCOStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
KOStockLag <- mutate(KOStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
DDStockLag <- mutate(DDStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
XOMStockLag <- mutate(XOMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
GEStockLag <- mutate(GEStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
HPQStockLag <- mutate(HPQStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
HDStockLag <- mutate(HDStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
INTCStockLag <- mutate(INTCStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
IBMStockLag <- mutate(IBMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
JNJStockLag <- mutate(JNJStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
JPMStockLag <- mutate(JPMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
KRFTStockLag <- mutate(KRFTStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
MCDStockLag <- mutate(MCDStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
MRKStockLag <- mutate(MRKStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
MSFTStockLag <- mutate(MSFTStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
PFEStockLag <- mutate(PFEStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
PGStockLag <- mutate(PGStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
TRVStockLag <- mutate(TRVStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
UTXStockLag <- mutate(UTXStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
VZStockLag <- mutate(VZStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
WMTStockLag <- mutate(WMTStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
DISStockLag <- mutate(DISStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
#TODO - All Models -Linear (FULL), SVR (Radial, Linear), Full Decision Tree
#For each individual stocks
#Train Quarter 1 [Week 1 - 12]
#Test Quarter 2 [Week 13 - 25]
#Prediction for Week 26
#----------------------------------------
# Model Selection using best_subset for few stocks
#----------------------------------------
# "MMM"
predictor_sel <- MMMStockLag[2:nrow(MMMStockLag),]
# perform best subset selection
# best_subset <- regsubsets(close ~ close_1+volume+percent_change_price+
# percent_change_volume_over_last_wk+previous_weeks_volume+
# days_to_next_dividend+percent_return_next_dividend+next_weeks_open+
# next_weeks_close+percent_change_next_weeks_price , predictor_sel,nvmax=10)
best_subset <- regsubsets(close ~ close_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend, predictor_sel,nvmax=10)
results <- summary(best_subset)
# stepwise selection with BIC,adj R2 and Cp
data.frame(predictors = 1:7,
adj_R2 = results$adjr2,
Cp = results$cp,
BIC = results$bic) %>%
gather(statistic, value, -predictors) %>%
ggplot(aes(predictors, value, color = statistic)) +
geom_line(show.legend = F) +
geom_point(show.legend = F)+facet_wrap(~ statistic, scales = "free")+
# scale_x_continuous(breaks = c(2,4,6,8,10))
scale_x_continuous(breaks = c(1,3,5,7)) + geom_vline(xintercept = 7,col = "#8866AA")

lm_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
lm_stock<- lm(close ~ close_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain)
summary(lm_stock)
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(lm_stock,newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict,stockDataTest$close)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_close <- predict(lm_stock,newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE,R2 = stockR2,wk26_close = pred_close)
}
# stock <- "3M"
# stockData <- MMMStockLag[2:nrow(MMMStockLag),]
# stock_RMSE_df <- lm_stock_fn(stockData,stock)
# stock_RMSE_df
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_RMSE_df <- lm_stock_fn(stockData,stock)
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
# stock_RMSE_df %>% arrange(desc(RMSE))
stock_RMSE_beta_df <- stock_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_RMSE_beta_df
## stock RMSE R2 wk26_close beta
## 1 MMM 0.08173473 0.9993467 90.92123 -0.13421958
## 2 AXP 0.13698121 0.9966374 48.51104 0.03239515
## 3 AA 0.08336188 0.9961460 15.22372 0.74909840
## 4 T 0.13514496 0.9936659 30.52512 -0.22282535
## 5 BAC 0.07429363 0.9991628 10.45915 -0.33132903
## 6 BA 0.18884720 0.9967019 71.44162 -0.23741113
## 7 CAT 1.81679363 0.9703712 99.05213 0.92789831
## 8 CVX 0.23857446 0.9990295 97.45156 0.30620406
## 9 CSCO 0.10097439 0.9979944 14.95534 -0.21872018
## 10 KO 0.19288767 0.9893348 65.06294 -0.04235410
## 11 DD 0.14695405 0.9978800 52.05700 0.84145395
## 12 XOM 0.69410250 0.9935125 77.45717 0.10266945
## 13 GE 0.17916477 0.9745907 17.68976 -0.12849894
## 14 HPQ 0.40108420 0.9874508 35.05573 0.24479489
## 15 HD 0.06364468 0.9944181 35.00414 0.19832895
## 16 INTC 0.20337768 0.9602232 21.09635 0.43036653
## 17 IBM 2.99950305 0.7815909 165.27614 0.13346597
## 18 JNJ 0.26200515 0.9936776 65.38101 -0.17262055
## 19 JPM 0.14451896 0.9982054 39.13618 -0.18909367
## 20 KRFT 0.05981023 0.9976355 34.56931 -0.06118033
## 21 MCD 0.10477064 0.9983814 81.85857 -0.42867590
## 22 MRK 0.08087900 0.9959822 34.59599 -0.46722907
## 23 MSFT 0.21108754 0.9743337 24.25737 -0.04802629
## 24 PFE 0.03649884 0.9990041 20.09871 -0.28261619
## 25 PG 1.74835922 0.7448523 62.56916 -0.22741460
## 26 TRV 0.51374964 0.9889477 57.14310 0.06636522
## 27 UTX 0.51976247 0.9848143 84.46974 0.44934253
## 28 VZ 0.05740034 0.9982092 36.00819 0.25720271
## 29 WMT 0.05684715 0.9987619 52.45641 -0.07107965
## 30 DIS 0.10085327 0.9982791 37.39680 0.19028154
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.0.5
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:quantmod':
##
## Lag
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(corrplot)
## corrplot 0.90 loaded
cor_5 <- rcorr(as.matrix(DISStockLag[2:nrow(DISStockLag),c("week","close","volume","percent_change_price","percent_change_volume_over_last_wk","previous_weeks_volume","days_to_next_dividend","percent_return_next_dividend","close_1")]))
# c("week","close","open","high","low","volume","percent_change_price","percent_change_volume_over_last_wk","previous_weeks_volume","days_to_next_dividend","percent_return_next_dividend","close_1","open_1")
M <- cor_5$r
p_mat <- cor_5$P
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M, method = "color", col = col(200),
type = "upper", order = "hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col = "darkblue", tl.srt = 45, #Text label color and rotation
# Combine with significance level
p.mat = p_mat, sig.level = 0.01,
# hide correlation coefficient on the principal diagonal
diag = FALSE
)

cor_5 <- rcorr(as.matrix(DISStockLag[2:nrow(DISStockLag),c("week","open","volume","percent_change_price","percent_change_volume_over_last_wk","previous_weeks_volume","days_to_next_dividend","percent_return_next_dividend","open_1")]))
M <- cor_5$r
p_mat <- cor_5$P
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M, method = "color", col = col(200),
type = "upper", order = "hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col = "darkblue", tl.srt = 45, #Text label color and rotation
# Combine with significance level
p.mat = p_mat, sig.level = 0.01,
# hide correlation coefficient on the principal diagonal
diag = FALSE
)

lm_stock_open_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
lm_stock<- lm(open ~ open_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain)
summary(lm_stock)
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(lm_stock,newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict,stockDataTest$close)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_open <- predict(lm_stock,newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE,R2 = stockR2,wk26_open = pred_open)
}
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_RMSE_open_df <- lm_stock_open_fn(stockData,stock)
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))
stock_RMSE_open_df
## stock RMSE R2 wk26_open
## 1 MMM 1.8822136 0.9992394 91.47696
## 11 AXP 1.1921029 0.9933907 48.55281
## 12 AA 0.5162242 0.9908228 14.56018
## 13 T 0.6770761 0.9878839 30.69457
## 14 BAC 0.4478526 0.9910542 10.42286
## 15 BA 1.9425551 0.9956137 74.09903
## 16 CAT 6.0515915 0.8765235 92.96805
## 17 CVX 1.7895533 0.9964108 98.47113
## 18 CSCO 0.3624150 0.9639440 14.66808
## 19 KO 0.9126795 0.9963773 65.47234
## 110 DD 1.2962037 0.9999508 49.41491
## 111 XOM 2.0052543 0.9428082 79.30629
## 112 GE 0.3451259 0.8417561 17.96719
## 113 HPQ 3.5167208 0.8938147 34.99727
## 114 HD 0.9938124 0.9984861 34.27297
## 115 INTC 1.1926198 0.4172535 21.17506
## 116 IBM 3.5527344 0.8803476 163.55746
## 117 JNJ 1.8471691 0.8405906 66.59087
## 118 JPM 0.8610462 0.9956759 40.22083
## 119 KRFT 0.3935196 0.9978537 34.39044
## 120 MCD 0.8876942 0.9979608 82.19868
## 121 MRK 1.4100163 0.9969460 35.34482
## 122 MSFT 0.4044811 0.9745418 24.09279
## 123 PFE 0.4284994 0.9989998 20.12584
## 124 PG 2.0151904 0.5027857 64.52804
## 125 TRV 2.3396409 0.5183677 58.21255
## 126 UTX 3.1071277 0.3872658 82.88227
## 127 VZ 0.7559505 0.9978611 35.31459
## 128 WMT 0.9695742 0.9976689 52.72207
## 129 DIS 0.5907595 0.9993784 37.71319
stock_return_df <- stock_RMSE_beta_df %>% inner_join(stock_RMSE_open_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_return_df %>% arrange(desc(return)) %>%
hchart('column', hcaes(x = stock, y = return))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Percentage Returns across various stocks for week 26 for Linear Regression")
stock_return_df %>% select(stock,return) %>% arrange(desc(return))
## stock return
## 1 CAT 6.54427873
## 2 DD 5.34674464
## 3 AA 4.55722071
## 4 HD 2.13338082
## 5 VZ 1.96407736
## 6 CSCO 1.95839284
## 7 UTX 1.91533411
## 8 IBM 1.05081067
## 9 MSFT 0.68311928
## 10 KRFT 0.52011437
## 11 BAC 0.34816939
## 12 HPQ 0.16706292
## 13 AXP -0.08602459
## 14 PFE -0.13480583
## 15 INTC -0.37171502
## 16 MCD -0.41376286
## 17 WMT -0.50388511
## 18 T -0.55202997
## 19 MMM -0.60751327
## 20 KO -0.62531127
## 21 DIS -0.83893759
## 22 CVX -1.03539758
## 23 GE -1.54407809
## 24 JNJ -1.81685159
## 25 TRV -1.83714501
## 26 MRK -2.11864794
## 27 XOM -2.33161478
## 28 JPM -2.69675031
## 29 PG -3.03570844
## 30 BA -3.58630326
svr_close_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)
# Fit the model
svm_stock <- train(close ~ close_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain, method = "svmLinear",
trControl = train_control, preProcess = c("center","scale"))
summary(svm_stock)
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$close)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_close <- predict(svm_stock, newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_close = pred_close)
}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm_RMSE_df <- svr_close_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_RMSE_beta_df <- stock_svm_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_svm_RMSE_beta_df
## stock RMSE R2 wk26_close beta
## 1 MMM 0.3802664 0.9943455 91.08929 -0.13421958
## 2 AXP 0.2142198 0.9923841 48.29861 0.03239515
## 3 AA 0.1128046 0.9950341 15.29994 0.74909840
## 4 T 0.1503597 0.9927434 30.39391 -0.22282535
## 5 BAC 0.1081417 0.9969500 10.43436 -0.33132903
## 6 BA 0.5078308 0.9846753 71.57247 -0.23741113
## 7 CAT 1.3931046 0.9814216 98.73987 0.92789831
## 8 CVX 0.5250525 0.9967137 97.92804 0.30620406
## 9 CSCO 0.1927739 0.9922807 14.88133 -0.21872018
## 10 KO 0.2715334 0.9893847 65.02723 -0.04235410
## 11 DD 0.2941684 0.9945414 51.95032 0.84145395
## 12 XOM 1.0217313 0.9897918 77.12696 0.10266945
## 13 GE 0.2353072 0.9551450 17.72318 -0.12849894
## 14 HPQ 0.5112175 0.9814440 35.33649 0.24479489
## 15 HD 0.1478074 0.9723425 35.17451 0.19832895
## 16 INTC 0.2867833 0.9204146 21.12982 0.43036653
## 17 IBM 2.0683745 0.9491333 165.52047 0.13346597
## 18 JNJ 0.3075229 0.9878257 65.29513 -0.17262055
## 19 JPM 0.1850479 0.9936928 39.42824 -0.18909367
## 20 KRFT 0.2701153 0.9871429 34.48921 -0.06118033
## 21 MCD 0.5157497 0.9603963 81.76642 -0.42867590
## 22 MRK 0.2901749 0.9806675 34.68842 -0.46722907
## 23 MSFT 0.1117296 0.9951931 24.38165 -0.04802629
## 24 PFE 0.1022475 0.9931887 20.14373 -0.28261619
## 25 PG 1.0045209 0.8959072 62.58114 -0.22741460
## 26 TRV 1.1100552 0.9580407 57.22094 0.06636522
## 27 UTX 0.6241828 0.9832161 84.44774 0.44934253
## 28 VZ 0.1395358 0.9929129 36.03309 0.25720271
## 29 WMT 0.3549703 0.9531464 52.49328 -0.07107965
## 30 DIS 0.1861740 0.9958456 37.72777 0.19028154
svr_open_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)
# Fit the model
svm_stock <- train(open ~ open_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain, method = "svmLinear",
trControl = train_control, preProcess = c("center","scale"))
summary(svm_stock)
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$open)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_open <- predict(svm_stock, newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_open = pred_open)
}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm_open_RMSE_df <- svr_open_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_open_RMSE_beta_df <- stock_svm_open_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_svm_open_RMSE_beta_df
## stock RMSE R2 wk26_open beta
## 1 MMM 0.11819476 0.9982432 91.52528 -0.13421958
## 2 AXP 0.11327886 0.9941003 48.38340 0.03239515
## 3 AA 0.13011319 0.9747921 14.61926 0.74909840
## 4 T 0.26798950 0.9546457 30.39764 -0.22282535
## 5 BAC 0.15880046 0.9865890 10.37817 -0.33132903
## 6 BA 0.32460122 0.9776563 74.21541 -0.23741113
## 7 CAT 3.10120506 0.8876992 93.60060 0.92789831
## 8 CVX 0.26153278 0.9991382 98.71365 0.30620406
## 9 CSCO 0.42140624 0.9625653 14.74548 -0.21872018
## 10 KO 0.19728909 0.9933203 65.37944 -0.04235410
## 11 DD 0.27717571 0.9959650 49.62808 0.84145395
## 12 XOM 1.84866591 0.9464878 79.71929 0.10266945
## 13 GE 0.52123336 0.7922848 17.99457 -0.12849894
## 14 HPQ 1.40001293 0.8810331 34.48635 0.24479489
## 15 HD 0.27077757 0.9871082 34.25877 0.19832895
## 16 INTC 0.54144445 0.4254526 21.24827 0.43036653
## 17 IBM 3.52987809 0.8375230 163.33635 0.13346597
## 18 JNJ 0.93558619 0.8888535 65.94157 -0.17262055
## 19 JPM 0.22834391 0.9904501 40.26681 -0.18909367
## 20 KRFT 0.18248134 0.9908823 34.27309 -0.06118033
## 21 MCD 0.38411613 0.9808093 81.79427 -0.42867590
## 22 MRK 0.22935978 0.9890732 35.30697 -0.46722907
## 23 MSFT 0.20675445 0.9888172 23.98540 -0.04802629
## 24 PFE 0.06200967 0.9991266 20.14129 -0.28261619
## 25 PG 1.78123334 0.5652423 64.22452 -0.22741460
## 26 TRV 1.95590152 0.5667495 58.51209 0.06636522
## 27 UTX 2.38878103 0.2997049 82.80277 0.44934253
## 28 VZ 0.11777273 0.9890945 35.32093 0.25720271
## 29 WMT 0.19346038 0.9765084 52.75279 -0.07107965
## 30 DIS 0.10637997 0.9983706 37.79230 0.19028154
stock_svm_return_df <- stock_svm_RMSE_beta_df %>% inner_join(stock_svm_open_RMSE_beta_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_svm_return_df %>% arrange(desc(return)) %>%
hchart('column', hcaes(x = stock, y = return))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Percentage Returns across various stocks for week 26 by SVR Linear")
svr_close_rad_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)
# Fit the model
svm_stock <- train(close ~ close_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain, method = "svmRadial",
trControl = train_control, preProcess = c("center","scale"))
summary(svm_stock)
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$close)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_close <- predict(svm_stock, newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_close = pred_close)
}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm_rad_RMSE_df <- svr_close_rad_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_rad_RMSE_beta_df <- stock_svm_rad_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_svm_rad_RMSE_beta_df
## stock RMSE R2 wk26_close beta
## 1 MMM 2.3098261 0.2493209 91.66328 -0.13421958
## 2 AXP 0.6774418 0.9273653 48.48744 0.03239515
## 3 AA 0.5311777 0.8849904 15.91051 0.74909840
## 4 T 0.5965136 0.8635392 30.36960 -0.22282535
## 5 BAC 0.7614229 0.8404442 11.20514 -0.33132903
## 6 BA 2.1603040 0.4561310 70.49220 -0.23741113
## 7 CAT 5.0268347 0.8093156 99.53022 0.92789831
## 8 CVX 3.1155324 0.8996591 97.90406 0.30620406
## 9 CSCO 0.3462675 0.9945265 15.36172 -0.21872018
## 10 KO 0.8312368 0.7903015 64.30593 -0.04235410
## 11 DD 1.4223760 0.8140969 52.15566 0.84145395
## 12 XOM 3.0205880 0.6538505 80.01028 0.10266945
## 13 GE 0.3829774 0.9410425 18.60998 -0.12849894
## 14 HPQ 2.2120349 0.6496335 38.20461 0.24479489
## 15 HD 0.6910601 0.3221326 36.04940 0.19832895
## 16 INTC 0.6059049 0.7154331 21.11640 0.43036653
## 17 IBM 5.7330400 0.6305468 165.74619 0.13346597
## 18 JNJ 0.7309835 0.9423770 65.52934 -0.17262055
## 19 JPM 0.6676120 0.9913537 40.91818 -0.18909367
## 20 KRFT 0.3864214 0.9448114 34.34196 -0.06118033
## 21 MCD 1.3590576 0.7912213 81.69304 -0.42867590
## 22 MRK 0.6815219 0.6597023 34.40185 -0.46722907
## 23 MSFT 0.3896214 0.9720678 24.24733 -0.04802629
## 24 PFE 0.6385690 0.6794693 20.29929 -0.28261619
## 25 PG 0.6158132 0.9370559 63.14637 -0.22741460
## 26 TRV 2.5438304 0.3594661 57.93584 0.06636522
## 27 UTX 2.3415057 0.6959357 84.42623 0.44934253
## 28 VZ 0.9085642 0.6232649 35.84094 0.25720271
## 29 WMT 0.9449857 0.7335300 52.73962 -0.07107965
## 30 DIS 0.9236357 0.9037071 39.04941 0.19028154
svr_open_rad_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)
# Fit the model
svm_stock <- train(open ~ open_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain, method = "svmRadial",
trControl = train_control, preProcess = c("center","scale"))
summary(svm_stock)
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$open)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_open <- predict(svm_stock, newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_open = pred_open)
}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm__rad_open_RMSE_df <- svr_open_rad_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_open_RMSE_beta_df <- stock_svm__rad_open_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_svm_open_RMSE_beta_df
## stock RMSE R2 wk26_open beta
## 1 MMM 1.8250386 0.7010019 92.00644 -0.13421958
## 2 AXP 0.7980375 0.8735437 48.11660 0.03239515
## 3 AA 0.5972163 0.6531913 15.88656 0.74909840
## 4 T 0.5689411 0.7777718 30.18383 -0.22282535
## 5 BAC 0.6043063 0.8894164 11.43113 -0.33132903
## 6 BA 1.3356951 0.5193469 72.47214 -0.23741113
## 7 CAT 4.2943722 0.7886975 97.80009 0.92789831
## 8 CVX 2.8681780 0.9785458 99.03857 0.30620406
## 9 CSCO 0.8161072 0.9559383 16.08062 -0.21872018
## 10 KO 1.0447807 0.7931935 64.77545 -0.04235410
## 11 DD 1.4769285 0.9022599 51.19368 0.84145395
## 12 XOM 3.1785649 0.8244240 80.38796 0.10266945
## 13 GE 0.5865323 0.8728129 19.07402 -0.12849894
## 14 HPQ 3.3257450 0.3333301 38.04851 0.24479489
## 15 HD 1.5870997 0.1059224 36.60216 0.19832895
## 16 INTC 0.3626126 0.7151968 21.26088 0.43036653
## 17 IBM 6.7613364 0.7005999 164.83533 0.13346597
## 18 JNJ 0.8233969 0.8720155 66.18207 -0.17262055
## 19 JPM 0.9930447 0.9408369 42.03521 -0.18909367
## 20 KRFT 0.5430266 0.8648034 33.85192 -0.06118033
## 21 MCD 1.5458830 0.7540043 81.42829 -0.42867590
## 22 MRK 1.2805482 0.3542292 34.70689 -0.46722907
## 23 MSFT 0.4980569 0.8969188 24.02085 -0.04802629
## 24 PFE 0.5434059 0.7434710 20.30861 -0.28261619
## 25 PG 0.7356310 0.8706409 63.97363 -0.22741460
## 26 TRV 2.5478872 0.2897717 58.47670 0.06636522
## 27 UTX 2.6783541 0.1186493 82.81499 0.44934253
## 28 VZ 0.5921405 0.8738799 35.52798 0.25720271
## 29 WMT 0.7888649 0.5459826 52.74240 -0.07107965
## 30 DIS 1.1259752 0.8493104 39.27596 0.19028154
stock_svm_rad_return_df <- stock_svm_rad_RMSE_beta_df %>% inner_join(stock_svm_open_RMSE_beta_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_svm_rad_return_df %>% arrange(desc(return)) %>%
hchart('column', hcaes(x = stock, y = return))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Percentage Returns across various stocks for week 26 by SVR Radial")
svr_close_DT_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
## DT_stock <- train(close ~ close_1+volume+percent_change_price+
# percent_change_volume_over_last_wk+previous_weeks_volume+
# days_to_next_dividend+percent_return_next_dividend,
# data = stockDataTrain, method = "rpart",
# trControl = train_control, preProcess = c("center","scale"))
tree_stock <- tree::tree(close ~ close_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain)
#summary(tree_stock)
# plot(tree_stock)
# text(tree_stock, pretty = 0)
# cv_stock <- cv.tree(tree_stock)
# cv_stock
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(tree_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$close)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_close <- predict(tree_stock, newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_close = pred_close)
}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_dtree_RMSE_df <- svr_close_DT_stock_fn(stockData,stock)
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))
stock_Dtree_RMSE_beta_df <- stock_dtree_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_Dtree_RMSE_beta_df
## stock RMSE R2 wk26_close beta
## 1 MMM 1.3640127 0.77225249 89.22800 -0.13421958
## 2 AXP 1.7402782 0.21996476 49.64714 0.03239515
## 3 AA 0.4568888 0.72116531 15.77857 0.74909840
## 4 T 0.8734701 0.61358179 30.93000 -0.22282535
## 5 BAC 0.7099938 0.74272164 11.71111 -0.33132903
## 6 BA 1.2736193 0.93588130 70.69800 -0.23741113
## 7 CAT 5.3173638 0.65013991 99.06571 0.92789831
## 8 CVX 3.9167189 0.80538358 98.17750 0.30620406
## 9 CSCO 1.1231843 0.65950923 15.81800 -0.21872018
## 10 KO 0.5033617 0.94056962 65.31400 -0.04235410
## 11 DD 1.2627142 0.79631176 50.49200 0.84145395
## 12 XOM 3.0970225 0.61688286 79.96400 0.10266945
## 13 GE 0.4896483 0.90083383 18.99000 -0.12849894
## 14 HPQ 1.6905548 0.81592494 35.86000 0.24479489
## 15 HD 0.4389129 0.87120818 34.92600 0.19832895
## 16 INTC 1.0368573 0.09373524 20.80125 0.43036653
## 17 IBM 4.5712513 0.53802047 164.50400 0.13346597
## 18 JNJ 1.2886980 0.72954963 66.11714 -0.17262055
## 19 JPM 1.0395242 0.88678157 42.08167 -0.18909367
## 20 KRFT 0.5278509 0.93864582 34.22889 -0.06118033
## 21 MCD 1.2810933 0.73549862 80.97286 -0.42867590
## 22 MRK 1.4454068 0.09309472 36.20000 -0.46722907
## 23 MSFT 0.9027134 0.51246977 24.31833 -0.04802629
## 24 PFE 0.3957176 0.91900197 20.13800 -0.28261619
## 25 PG 0.4911606 0.94846701 61.97000 -0.22741460
## 26 TRV 3.9221258 0.35116913 58.04000 0.06636522
## 27 UTX 2.6940861 0.59835061 86.35250 0.44934253
## 28 VZ 0.6662863 0.88596265 35.79375 0.25720271
## 29 WMT 0.6671347 0.95123438 52.84875 -0.07107965
## 30 DIS 0.6211773 0.91267898 39.09600 0.19028154
svr_open_DT_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)
train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]
#------------------------------------
#Model - Train
#-----------------------------------
## DT_stock <- train(close ~ close_1+volume+percent_change_price+
# percent_change_volume_over_last_wk+previous_weeks_volume+
# days_to_next_dividend+percent_return_next_dividend,
# data = stockDataTrain, method = "rpart",
# trControl = train_control, preProcess = c("center","scale"))
tree_open_stock <- tree::tree(open ~ open_1+volume+percent_change_price+
percent_change_volume_over_last_wk+previous_weeks_volume+
days_to_next_dividend+percent_return_next_dividend,
data = stockDataTrain)
#summary(tree_stock)
# plot(tree_stock)
# text(tree_stock, pretty = 0)
# cv_stock <- cv.tree(tree_stock)
# cv_stock
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(tree_open_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$open)
# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)
#-------------------------------------------
# Return using Week 25
#-----------------------------------------
pred_open <- predict(tree_open_stock, newdata = stockDataValidation)
data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_open = pred_open)
}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_dtree_open_RMSE_df <- svr_open_DT_stock_fn(stockData,stock)
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
## Warning in cor(obs, pred, use = ifelse(na.rm, "complete.obs", "everything")):
## the standard deviation is zero
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
stock_Dtree__open_RMSE_beta_df <- stock_dtree_open_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))
stock_Dtree__open_RMSE_beta_df
## stock RMSE R2 wk26_open beta
## 1 MMM 2.0643452 0.433372010 89.74000 -0.13421958
## 2 AXP 1.2396555 0.710850398 49.72429 0.03239515
## 3 AA 0.5866951 0.522453094 16.16250 0.74909840
## 4 T 1.2937533 0.198892956 30.95556 -0.22282535
## 5 BAC 0.6888932 0.751696741 11.81625 -0.33132903
## 6 BA 2.0676851 0.037960260 70.76800 -0.23741113
## 7 CAT 3.9473770 0.871723042 97.79000 0.92789831
## 8 CVX 2.7034011 0.857684982 96.11200 0.30620406
## 9 CSCO 1.0232202 0.797148180 16.20000 -0.21872018
## 10 KO 1.5738309 0.416030578 63.67750 -0.04235410
## 11 DD 1.3046509 0.888054787 50.64167 0.84145395
## 12 XOM 4.6601760 0.104914615 85.23600 0.10266945
## 13 GE 0.6399149 0.842387356 19.47222 -0.12849894
## 14 HPQ 1.5957313 0.831523206 36.83600 0.24479489
## 15 HD 0.9926774 0.819579263 35.41800 0.19832895
## 16 INTC 0.5366394 0.514737396 20.43667 0.43036653
## 17 IBM 6.0808974 0.386377616 163.66000 0.13346597
## 18 JNJ 2.1537253 0.716364977 66.01857 -0.17262055
## 19 JPM 1.1971007 0.821442805 42.20000 -0.18909367
## 20 KRFT 0.4543083 0.880131933 34.28571 -0.06118033
## 21 MCD 2.6241116 0.247825969 80.42714 -0.42867590
## 22 MRK 1.9259772 0.002181522 33.18444 -0.46722907
## 23 MSFT 0.5226120 0.845113889 24.36400 -0.04802629
## 24 PFE 0.4126617 0.911801711 20.77833 -0.28261619
## 25 PG 1.8064435 0.513335108 65.95000 -0.22741460
## 26 TRV 2.8095352 0.036388373 58.05000 0.06636522
## 27 UTX 2.9995195 NA 82.28556 0.44934253
## 28 VZ 0.7409469 0.493517374 36.20800 0.25720271
## 29 WMT 1.0492123 0.538332209 52.67571 -0.07107965
## 30 DIS 1.0735381 0.816328766 39.67800 0.19028154
stock_svr_dt_return_df <- stock_Dtree_RMSE_beta_df %>% inner_join(stock_Dtree__open_RMSE_beta_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_svr_dt_return_df %>% arrange(desc(return)) %>%
hchart('column', hcaes(x = stock, y = return))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Percentage Returns across various stocks for week 26 by Decision Tree Regression")
# stock_return_df
# stock_svm_return_df
# stock_svm_rad_return_df
# stock_svr_dt_return_df
#Linear model average RMSE and R2
lm_compare_df <- stock_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
meanOpenRMSE = mean(RMSE.y),
meanCloseR2 = mean(R2.x),
meanOpenR2 = mean(R2.y))
lm_compare_df <- cbind(Model = 'Linear Regression',lm_compare_df)
#SVR Linear model average RMSE and R2
lm_compare_df_tmp <- stock_svm_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
meanOpenRMSE = mean(RMSE.y),
meanCloseR2 = mean(R2.x),
meanOpenR2 = mean(R2.y))
lm_compare_df_tmp <- cbind(Model = 'SVR Linear Kernel',lm_compare_df_tmp)
lm_compare_df <- rbind(lm_compare_df,lm_compare_df_tmp)
#SVR Radial average RMSE and R2
lm_compare_df_tmp <- stock_svm_rad_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
meanOpenRMSE = mean(RMSE.y),
meanCloseR2 = mean(R2.x),
meanOpenR2 = mean(R2.y))
lm_compare_df_tmp <- cbind(Model = 'SVR Radial Kernel',lm_compare_df_tmp)
lm_compare_df <- rbind(lm_compare_df,lm_compare_df_tmp)
#Decision Tree Regression model average RMSE and R2
lm_compare_df_tmp <- stock_svr_dt_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
meanOpenRMSE = mean(RMSE.y),
meanCloseR2 = mean(R2.x),
meanOpenR2 = mean(na.omit(R2.y)))
lm_compare_df_tmp <- cbind(Model = 'Decision Tree Regression',lm_compare_df_tmp)
lm_compare_df <- rbind(lm_compare_df,lm_compare_df_tmp)
# lm_compare_df <- as.data.frame(t(as.matrix(lm_compare_df)))
#
# model_name <- c(lm_compare_df[1,1],lm_compare_df[1,2],lm_compare_df[1,3],lm_compare_df[1,4])
#
# lm_compare_df <- lm_compare_df[-1,]
#
# colnames(lm_compare_df) <- model_name
#
# mrownames <- rownames(lm_compare_df)
#
# lm_compare_df <- cbind(Means = mrownames,lm_compare_df)
#
# # rownames(lm_compare_df) <- c(1:4)
#
# lm_compare_df %>% ggplot(aes(Model)) +
# geom_bar(geom=GeomBar)
#
#
# # Use position=position_dodge()
# # ggplot(data=lm_compare_df, aes(x=Means, y=len, fill=supp)) +
# # geom_bar(stat="identity", position=position_dodge())
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
hchart('column', hcaes(x = Model, y = meanCloseRMSE))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Stocks' Close Price mean RMSE Comparision")
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
hchart('column', hcaes(x = Model, y = meanOpenRMSE))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Stocks' Open Price mean RMSE Comparision")
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
hchart('column', hcaes(x = Model, y = meanOpenR2))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Stocks' Open Price mean R-Squared Comparision")
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
hchart('column', hcaes(x = Model, y = meanCloseR2))
hc %>%
hc_add_theme(hc_theme_economist()) %>%
hc_title(text = "Stocks' Close Price mean R-Squared Comparision")