DowJonesData <- read.csv("dow_jones_index.csv", colClasses=c("numeric", "character", "character", rep("numeric",13)), header=TRUE, sep=",")
DJIndexOnly <- read.csv("DJIndexOnly.csv", colClasses=c("numeric", "character", rep("numeric",5)), header=TRUE, sep=",")
DowJonesData$date <- format(strptime(as.character(DowJonesData$date), "%m/%d/%Y"), "%Y-%m-%d")
DJIndexOnly$Date <- format(strptime(as.character(DJIndexOnly$Date), "%m/%d/%Y"), "%Y-%m-%d")
MMMStock <- filter(DowJonesData, stock == 'MMM')
AXPStock <- filter(DowJonesData, stock == 'AXP')
AAStock <- filter(DowJonesData, stock == 'AA')
TStock <- filter(DowJonesData, stock == 'T')
BACStock <- filter(DowJonesData, stock == 'BAC')
BAStock <- filter(DowJonesData, stock == 'BA')
CATStock <- filter(DowJonesData, stock == 'CAT')
CVXStock <- filter(DowJonesData, stock == 'CVX')
CSCOStock <- filter(DowJonesData, stock == 'CSCO')
KOStock <- filter(DowJonesData, stock == 'KO')
DDStock <- filter(DowJonesData, stock == 'DD')
XOMStock <- filter(DowJonesData, stock == 'XOM')
GEStock <- filter(DowJonesData, stock == 'GE')
HPQStock <- filter(DowJonesData, stock == 'HPQ')
HDStock <- filter(DowJonesData, stock == 'HD')
INTCStock <- filter(DowJonesData, stock == 'INTC')
IBMStock <- filter(DowJonesData, stock == 'IBM')
JNJStock <- filter(DowJonesData, stock == 'JNJ')
JPMStock <- filter(DowJonesData, stock == 'JPM')
KRFTStock <- filter(DowJonesData, stock == 'KRFT')
MCDStock <- filter(DowJonesData, stock == 'MCD')
MRKStock <- filter(DowJonesData, stock == 'MRK')
MSFTStock <- filter(DowJonesData, stock == 'MSFT')
PFEStock <- filter(DowJonesData, stock == 'PFE')
PGStock <- filter(DowJonesData, stock == 'PG')
TRVStock <- filter(DowJonesData, stock == 'TRV')
UTXStock <- filter(DowJonesData, stock == 'UTX')
VZStock <- filter(DowJonesData, stock == 'VZ')
WMTStock <- filter(DowJonesData, stock == 'WMT')
DISStock <- filter(DowJonesData, stock == 'DIS')
ReturnDJI <- na.omit(Delt(DJIndexOnly[,6]))
MMMReturn <- na.omit(Delt(MMMStock[,7]))
AXPReturn <- na.omit(Delt(AXPStock[,7]))
AAReturn <- na.omit(Delt(AAStock[,7]))
TReturn <- na.omit(Delt(TStock[,7]))
BACReturn <- na.omit(Delt(BACStock[,7]))
BAReturn <- na.omit(Delt(BAStock[,7]))
CATReturn <- na.omit(Delt(CATStock[,7]))
CVXReturn <- na.omit(Delt(CVXStock[,7]))
CSCOReturn <- na.omit(Delt(CSCOStock[,7]))
KOReturn <- na.omit(Delt(KOStock[,7]))
DDReturn <- na.omit(Delt(DDStock[,7]))
XOMReturn <- na.omit(Delt(XOMStock[,7]))
GEReturn <- na.omit(Delt(GEStock[,7]))
HPQReturn <- na.omit(Delt(HPQStock[,7]))
HDReturn <- na.omit(Delt(HDStock[,7]))
INTCReturn <- na.omit(Delt(INTCStock[,7]))
IBMReturn <- na.omit(Delt(IBMStock[,7]))
JNJReturn <- na.omit(Delt(JNJStock[,7]))
JPMReturn <- na.omit(Delt(JPMStock[,7]))
KRFTReturn <- na.omit(Delt(KRFTStock[,7]))
MCDReturn <- na.omit(Delt(MCDStock[,7]))
MRKReturn <- na.omit(Delt(MRKStock[,7]))
MSFTReturn <- na.omit(Delt(MSFTStock[,7]))
PFEReturn <- na.omit(Delt(PFEStock[,7]))
PGReturn <- na.omit(Delt(PGStock[,7]))
TRVReturn <- na.omit(Delt(TRVStock[,7]))
UTXReturn <- na.omit(Delt(UTXStock[,7]))
VZReturn <- na.omit(Delt(VZStock[,7]))
WMTReturn <- na.omit(Delt(WMTStock[,7]))
DISReturn <- na.omit(Delt(DISStock[,7]))
# Merge the returns of all three index into single data
DJStockReturns = cbind(ReturnDJI, MMMReturn, AXPReturn, AAReturn, TReturn, 
                        BACReturn, BAReturn, CATReturn, CVXReturn, CSCOReturn, 
                       KOReturn, DDReturn, XOMReturn, GEReturn, HPQReturn, 
                       HDReturn, INTCReturn, IBMReturn, JNJReturn, JPMReturn, 
                       KRFTReturn, MCDReturn, MRKReturn, MSFTReturn, PFEReturn, 
                       PGReturn, TRVReturn, UTXReturn, VZReturn, WMTReturn, 
                       DISReturn)

#Column names
DJColNames <- c("DJI", "MMM", "AXP", "AA", "T", "BAC", "BA", "CAT", "CVX", 
                "CSCO", "KO", "DD", "XOM", "GE", "HPQ", "HD", "INTC", "IBM", 
                "JNJ", "JPM", "KRFT", "MCD", "MRK", "MSFT", "PFE", "PG", 
                "TRV", "UTX", "VZ", "WMT", "DIS")

# Monthly Return of Investment of Interest
colnames(DJStockReturns) = DJColNames
# See how the data looks. You can see the risk is much lesser in the S&P as this 
# represents the market, compared to the individual stock. 
par(mfrow = c(2,1))
boxplot(DJStockReturns[],  main="Expected Return", xlab="Stock Picks", ylab="Return")

# According to the CAPM formula, we will first get the beta of each stock by 
# regressions; then calculate the expected return of each stock and the covariance 
# matrix of the four stocks; finally we can calculate the optimal asset allocations 
# (weights) of the portfolio consisting of the 2 stocks.

# Model for 3M. Model fit can be judged by R-sq. Using LM here as interpretation 
# is key. SVM is not so useful.
lm.3M<- lm(MMM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.3M)
Beta3M <- summary(lm.3M)$coefficients[2, 1]

# Model for AXP Model fit can be judged by R-sq. 
lm.AXP<- lm(AXP ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.AXP)
BetaAXP <- summary(lm.AXP)$coefficients[2, 1]

# Model for AA Model fit can be judged by R-sq. 
lm.AA<- lm(AA ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.AA)
BetaAA <- summary(lm.AA)$coefficients[2, 1]

# Model for T Model fit can be judged by R-sq. 
lm.T<- lm(T ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.T)
BetaT <- summary(lm.T)$coefficients[2, 1]

# Model for BAC Model fit can be judged by R-sq. 
lm.BAC<- lm(BAC ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.BAC)
BetaBAC <- summary(lm.BAC)$coefficients[2, 1]

# Model for BA Model fit can be judged by R-sq. 
lm.BA<- lm(BA ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.BA)
BetaBA <- summary(lm.BA)$coefficients[2, 1]

# Model for CAT Model fit can be judged by R-sq. 
lm.CAT<- lm(CAT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.CAT)
BetaCAT <- summary(lm.CAT)$coefficients[2, 1]

# Model for CVX Model fit can be judged by R-sq. 
lm.CVX<- lm(CVX ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.CVX)
BetaCVX <- summary(lm.CVX)$coefficients[2, 1]

# Model for CSCO Model fit can be judged by R-sq. 
lm.CSCO<- lm(CSCO ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.CSCO)
BetaCSCO <- summary(lm.CSCO)$coefficients[2, 1]

# Model for KO Model fit can be judged by R-sq. 
lm.KO<- lm(KO ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.KO)
BetaKO <- summary(lm.KO)$coefficients[2, 1]

# Model for DD Model fit can be judged by R-sq. 
lm.DD<- lm(DD ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.DD)
BetaDD <- summary(lm.DD)$coefficients[2, 1]

# Model for XOM Model fit can be judged by R-sq. 
lm.XOM<- lm(XOM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.XOM)
BetaXOM <- summary(lm.XOM)$coefficients[2, 1]

# Model for GE Model fit can be judged by R-sq. 
lm.GE<- lm(GE ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.GE)
BetaGE <- summary(lm.GE)$coefficients[2, 1]

# Model for HPQ Model fit can be judged by R-sq. 
lm.HPQ<- lm(HPQ ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.HPQ)
BetaHPQ <- summary(lm.HPQ)$coefficients[2, 1]

# Model for HD Model fit can be judged by R-sq. 
lm.HD<- lm(HD ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.HD)
BetaHD <- summary(lm.HD)$coefficients[2, 1]

# Model for INTC Model fit can be judged by R-sq. 
lm.INTC<- lm(INTC ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.INTC)
BetaINTC <- summary(lm.INTC)$coefficients[2, 1]

# Model for IBM Model fit can be judged by R-sq. 
lm.IBM<- lm(IBM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.IBM)
BetaIBM <- summary(lm.IBM)$coefficients[2, 1]

# Model for JNJ Model fit can be judged by R-sq. 
lm.JNJ<- lm(JNJ ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.JNJ)
BetaJNJ <- summary(lm.JNJ)$coefficients[2, 1]

# Model for JPM Model fit can be judged by R-sq. 
lm.JPM<- lm(JPM ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.JPM)
BetaJPM <- summary(lm.JPM)$coefficients[2, 1]

# Model for KRFT Model fit can be judged by R-sq. 
lm.KRFT<- lm(KRFT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.KRFT)
BetaKRFT <- summary(lm.KRFT)$coefficients[2, 1]

# Model for MCD Model fit can be judged by R-sq. 
lm.MCD<- lm(MCD ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.MCD)
BetaMCD <- summary(lm.MCD)$coefficients[2, 1]

# Model for MRK Model fit can be judged by R-sq. 
lm.MRK<- lm(MRK ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.MRK)
BetaMRK <- summary(lm.MRK)$coefficients[2, 1]

# Model for MSFT Model fit can be judged by R-sq. 
lm.MSFT<- lm(MSFT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.MSFT)
BetaMSFT <- summary(lm.MSFT)$coefficients[2, 1]

# Model for PFE Model fit can be judged by R-sq. 
lm.PFE<- lm(PFE ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.PFE)
BetaPFE <- summary(lm.PFE)$coefficients[2, 1]

# Model for PG Model fit can be judged by R-sq. 
lm.PG<- lm(PG ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.PG)
BetaPG <- summary(lm.PG)$coefficients[2, 1]

# Model for TRV Model fit can be judged by R-sq. 
lm.TRV<- lm(TRV ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.TRV)
BetaTRV <- summary(lm.TRV)$coefficients[2, 1]

# Model for UTX Model fit can be judged by R-sq. 
lm.UTX<- lm(UTX ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.UTX)
BetaUTX <- summary(lm.UTX)$coefficients[2, 1]

# Model for VZ Model fit can be judged by R-sq. 
lm.VZ<- lm(VZ ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.VZ)
BetaVZ <- summary(lm.VZ)$coefficients[2, 1]

# Model for WMT Model fit can be judged by R-sq. 
lm.WMT<- lm(WMT ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.WMT)
BetaWMT <- summary(lm.WMT)$coefficients[2, 1]

# Model for DIS Model fit can be judged by R-sq. 
lm.DIS<- lm(DIS ~ DJI, data = as.data.frame(DJStockReturns))
#summary(lm.DIS)
BetaDIS <- summary(lm.DIS)$coefficients[2, 1]

betas <- c(Beta3M,BetaAXP, BetaAA, BetaT, BetaBAC, BetaBA, BetaCAT, BetaCVX, BetaCSCO, BetaKO, BetaDD, BetaXOM, BetaGE, BetaHPQ, BetaHD, BetaINTC, BetaIBM, BetaJNJ, BetaJPM, BetaKRFT, BetaMCD, BetaMRK, BetaMSFT, BetaPFE, BetaPG, BetaTRV, BetaUTX, BetaVZ, BetaWMT, BetaDIS)
stocktic <- c("MMM", "AXP", "AA", "T", "BAC", "BA", "CAT", "CVX", 
                "CSCO", "KO", "DD", "XOM", "GE", "HPQ", "HD", "INTC", "IBM", 
                "JNJ", "JPM", "KRFT", "MCD", "MRK", "MSFT", "PFE", "PG", 
                "TRV", "UTX", "VZ", "WMT", "DIS")

stockbeta_df <- data.frame(stock=stocktic,beta = betas)
stockbeta_df %>% arrange(desc(beta))
##    stock        beta
## 1    CAT  0.92789831
## 2     DD  0.84145395
## 3     AA  0.74909840
## 4    UTX  0.44934253
## 5   INTC  0.43036653
## 6    CVX  0.30620406
## 7     VZ  0.25720271
## 8    HPQ  0.24479489
## 9     HD  0.19832895
## 10   DIS  0.19028154
## 11   IBM  0.13346597
## 12   XOM  0.10266945
## 13   TRV  0.06636522
## 14   AXP  0.03239515
## 15    KO -0.04235410
## 16  MSFT -0.04802629
## 17  KRFT -0.06118033
## 18   WMT -0.07107965
## 19    GE -0.12849894
## 20   MMM -0.13421958
## 21   JNJ -0.17262055
## 22   JPM -0.18909367
## 23  CSCO -0.21872018
## 24     T -0.22282535
## 25    PG -0.22741460
## 26    BA -0.23741113
## 27   PFE -0.28261619
## 28   BAC -0.33132903
## 29   MCD -0.42867590
## 30   MRK -0.46722907
# If a stock has a beta of 1.00, it indicates that its price is correlated with 
# the market. A stock like that has systemic risk, but the beta calculation can’t 
# detect any unsystemic risk. Adding a stock to a portfolio with a beta of 1.00 
# doesn’t add any risk to the portfolio, but it also doesn’t increase the likelihood
# that the portfolio will provide excess return.

# A beta of less than 1.00 means that the security is theoretically less volatile 
# than the market which means the portfolio is less risky with the stock included 
# than without it. 

# A beta that is greater than 1.00 indicates that the security's price is 
# theoretically more volatile than the market. For example, if a stock's beta is 
# 1.20, it is assumed to be 20% more volatile than the market. Technology stocks and 
# small caps tend to have higher betas than the market benchmark. This indicates that
# adding the stock to a portfolio will increase the portfolio’s risk, but also 
# increase its expected return.

# For computation of return on investment, you can use US Treasury bill rates as 
# the risk free rate, and the S&P as the market rate. 
#ROI Calculation

#US Treasury bill rates(Jan 1, 2011)    3.39% (risk free rate for 2 quarters)
#DJI market rate for 2011   -   7.22% (for 2 quarters)
#ERi=3.39%+betai(7.22%−3.39%)

#For example, for DD stock the calculation is shown below:
#Beta of DD:  0.841453946721087
#DD (roi) = 6.61%

ROI.3M <- (3.39 + Beta3M * (7.22 - 3.39))
ROI.AXP <- (3.39 + BetaAXP * (7.22 - 3.39))
ROI.AA <- (3.39 + BetaAA * (7.22 - 3.39))
ROI.T <- (3.39 + BetaT * (7.22 - 3.39))
ROI.BAC <- (3.39 + BetaBAC * (7.22 - 3.39))
ROI.BA <- (3.39 + BetaBA * (7.22 - 3.39))
ROI.CAT <- (3.39 + BetaCAT * (7.22 - 3.39))
ROI.CVX <- (3.39 + BetaCVX * (7.22 - 3.39))
ROI.CSCO <- (3.39 + BetaCSCO * (7.22 - 3.39))
ROI.KO <- (3.39 + BetaKO * (7.22 - 3.39))
ROI.DD <- (3.39 + BetaDD * (7.22 - 3.39))
ROI.XOM <- (3.39 + BetaXOM * (7.22 - 3.39))
ROI.GE <- (3.39 + BetaGE * (7.22 - 3.39))
ROI.HPQ <- (3.39 + BetaHPQ * (7.22 - 3.39))
ROI.HD <- (3.39 + BetaHD * (7.22 - 3.39))
ROI.INTC <- (3.39 + BetaINTC * (7.22 - 3.39))
ROI.IBM <- (3.39 + BetaIBM * (7.22 - 3.39))
ROI.JNJ <- (3.39 + BetaJNJ * (7.22 - 3.39))
ROI.JPM <- (3.39 + BetaJPM * (7.22 - 3.39))
ROI.KRFT <- (3.39 + BetaKRFT * (7.22 - 3.39))
ROI.MCD <- (3.39 + BetaMCD * (7.22 - 3.39))
ROI.MRK <- (3.39 + BetaMRK * (7.22 - 3.39))
ROI.MSFT <- (3.39 + BetaMSFT * (7.22 - 3.39))
ROI.PFE <- (3.39 + BetaPFE * (7.22 - 3.39))
ROI.PG <- (3.39 + BetaPG * (7.22 - 3.39))
ROI.TRV <- (3.39 + BetaTRV * (7.22 - 3.39))
ROI.UTX <- (3.39 + BetaUTX * (7.22 - 3.39))
ROI.VZ <- (3.39 + BetaVZ * (7.22 - 3.39))
ROI.WMT <- (3.39 + BetaWMT * (7.22 - 3.39))
ROI.DIS <- (3.39 + BetaDIS * (7.22 - 3.39))

#Build the ROI for the tic
rois <- c(ROI.3M, ROI.AXP, ROI.AA, ROI.T, ROI.BAC, ROI.BA, ROI.CAT, ROI.CVX, 
          ROI.CSCO, ROI.KO, ROI.DD, ROI.XOM, ROI.GE, ROI.HPQ, ROI.HD, ROI.INTC, 
          ROI.IBM, ROI.JNJ, ROI.JPM, ROI.KRFT, ROI.MCD, ROI.MRK, ROI.MSFT, 
          ROI.PFE, ROI.PG, ROI.TRV, ROI.UTX, ROI.VZ, ROI.WMT, ROI.DIS)

stockroi_df <- data.frame(stock=stocktic, roi = rois)
stockroi_df %>% arrange(desc(rois))
##    stock      roi
## 1    CAT 6.943851
## 2     DD 6.612769
## 3     AA 6.259047
## 4    UTX 5.110982
## 5   INTC 5.038304
## 6    CVX 4.562762
## 7     VZ 4.375086
## 8    HPQ 4.327564
## 9     HD 4.149600
## 10   DIS 4.118778
## 11   IBM 3.901175
## 12   XOM 3.783224
## 13   TRV 3.644179
## 14   AXP 3.514073
## 15    KO 3.227784
## 16  MSFT 3.206059
## 17  KRFT 3.155679
## 18   WMT 3.117765
## 19    GE 2.897849
## 20   MMM 2.875939
## 21   JNJ 2.728863
## 22   JPM 2.665771
## 23  CSCO 2.552302
## 24     T 2.536579
## 25    PG 2.519002
## 26    BA 2.480715
## 27   PFE 2.307580
## 28   BAC 2.121010
## 29   MCD 1.748171
## 30   MRK 1.600513
#Adding lag for close and open to all the stocks
MMMStockLag <- mutate(MMMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
AXPStockLag <- mutate(AXPStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
AAStockLag <- mutate(AAStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
TStockLag <- mutate(TStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
BACStockLag <- mutate(BACStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
BAStockLag <- mutate(BAStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
CATStockLag <- mutate(CATStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
CVXStockLag <- mutate(CVXStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
CSCOStockLag <- mutate(CSCOStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
KOStockLag <- mutate(KOStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
DDStockLag <- mutate(DDStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
XOMStockLag <- mutate(XOMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
GEStockLag <- mutate(GEStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
HPQStockLag <- mutate(HPQStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
HDStockLag <- mutate(HDStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
INTCStockLag <- mutate(INTCStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
IBMStockLag <- mutate(IBMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
JNJStockLag <- mutate(JNJStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
JPMStockLag <- mutate(JPMStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
KRFTStockLag <- mutate(KRFTStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
MCDStockLag <- mutate(MCDStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
MRKStockLag <- mutate(MRKStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
MSFTStockLag <- mutate(MSFTStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
PFEStockLag <- mutate(PFEStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
PGStockLag <- mutate(PGStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
TRVStockLag <- mutate(TRVStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
UTXStockLag <- mutate(UTXStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
VZStockLag <- mutate(VZStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
WMTStockLag <- mutate(WMTStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
DISStockLag <- mutate(DISStock, close_1 = lag(close), open_1 = lag(open),week = c(1:25))
#TODO - All Models -Linear (FULL), SVR (Radial, Linear), Full Decision Tree
#For each individual stocks
#Train Quarter 1 [Week 1 - 12]
#Test Quarter 2 [Week 13 - 25]
#Prediction for Week 26
#----------------------------------------
# Model Selection using best_subset for few stocks
#----------------------------------------
# "MMM"
predictor_sel <- MMMStockLag[2:nrow(MMMStockLag),]

# perform best subset selection
# best_subset <- regsubsets(close ~ close_1+volume+percent_change_price+
#                       percent_change_volume_over_last_wk+previous_weeks_volume+
#                       days_to_next_dividend+percent_return_next_dividend+next_weeks_open+
#                       next_weeks_close+percent_change_next_weeks_price , predictor_sel,nvmax=10)

best_subset <- regsubsets(close ~ close_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, predictor_sel,nvmax=10)
results <- summary(best_subset)

# stepwise selection with  BIC,adj R2 and Cp
data.frame(predictors = 1:7,
       adj_R2 = results$adjr2,
       Cp = results$cp,
       BIC = results$bic) %>%
  gather(statistic, value, -predictors) %>%
  ggplot(aes(predictors, value, color = statistic)) +
  geom_line(show.legend = F) +
  geom_point(show.legend = F)+facet_wrap(~ statistic, scales = "free")+
  # scale_x_continuous(breaks = c(2,4,6,8,10))
  scale_x_continuous(breaks = c(1,3,5,7)) + geom_vline(xintercept = 7,col = "#8866AA")

lm_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
lm_stock<- lm(close ~ close_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend,
              data = stockDataTrain)
summary(lm_stock)

#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(lm_stock,newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict,stockDataTest$close)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_close <- predict(lm_stock,newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE,R2 = stockR2,wk26_close = pred_close)

}
# stock <- "3M"
# stockData <- MMMStockLag[2:nrow(MMMStockLag),]
# stock_RMSE_df <- lm_stock_fn(stockData,stock)
# stock_RMSE_df
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_RMSE_df <- lm_stock_fn(stockData,stock)

stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))

 
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))



stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))



stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))



stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))



stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))



stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))


stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_RMSE_df <- rbind(stock_RMSE_df,lm_stock_fn(stockData,stock))
# stock_RMSE_df %>% arrange(desc(RMSE))

stock_RMSE_beta_df <- stock_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_RMSE_beta_df
##    stock       RMSE        R2 wk26_close        beta
## 1    MMM 0.08173473 0.9993467   90.92123 -0.13421958
## 2    AXP 0.13698121 0.9966374   48.51104  0.03239515
## 3     AA 0.08336188 0.9961460   15.22372  0.74909840
## 4      T 0.13514496 0.9936659   30.52512 -0.22282535
## 5    BAC 0.07429363 0.9991628   10.45915 -0.33132903
## 6     BA 0.18884720 0.9967019   71.44162 -0.23741113
## 7    CAT 1.81679363 0.9703712   99.05213  0.92789831
## 8    CVX 0.23857446 0.9990295   97.45156  0.30620406
## 9   CSCO 0.10097439 0.9979944   14.95534 -0.21872018
## 10    KO 0.19288767 0.9893348   65.06294 -0.04235410
## 11    DD 0.14695405 0.9978800   52.05700  0.84145395
## 12   XOM 0.69410250 0.9935125   77.45717  0.10266945
## 13    GE 0.17916477 0.9745907   17.68976 -0.12849894
## 14   HPQ 0.40108420 0.9874508   35.05573  0.24479489
## 15    HD 0.06364468 0.9944181   35.00414  0.19832895
## 16  INTC 0.20337768 0.9602232   21.09635  0.43036653
## 17   IBM 2.99950305 0.7815909  165.27614  0.13346597
## 18   JNJ 0.26200515 0.9936776   65.38101 -0.17262055
## 19   JPM 0.14451896 0.9982054   39.13618 -0.18909367
## 20  KRFT 0.05981023 0.9976355   34.56931 -0.06118033
## 21   MCD 0.10477064 0.9983814   81.85857 -0.42867590
## 22   MRK 0.08087900 0.9959822   34.59599 -0.46722907
## 23  MSFT 0.21108754 0.9743337   24.25737 -0.04802629
## 24   PFE 0.03649884 0.9990041   20.09871 -0.28261619
## 25    PG 1.74835922 0.7448523   62.56916 -0.22741460
## 26   TRV 0.51374964 0.9889477   57.14310  0.06636522
## 27   UTX 0.51976247 0.9848143   84.46974  0.44934253
## 28    VZ 0.05740034 0.9982092   36.00819  0.25720271
## 29   WMT 0.05684715 0.9987619   52.45641 -0.07107965
## 30   DIS 0.10085327 0.9982791   37.39680  0.19028154
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.0.5
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:quantmod':
## 
##     Lag
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(corrplot)
## corrplot 0.90 loaded
cor_5 <- rcorr(as.matrix(DISStockLag[2:nrow(DISStockLag),c("week","close","volume","percent_change_price","percent_change_volume_over_last_wk","previous_weeks_volume","days_to_next_dividend","percent_return_next_dividend","close_1")]))

# c("week","close","open","high","low","volume","percent_change_price","percent_change_volume_over_last_wk","previous_weeks_volume","days_to_next_dividend","percent_return_next_dividend","close_1","open_1")

M <- cor_5$r
p_mat <- cor_5$P
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M, method = "color", col = col(200),  
         type = "upper", order = "hclust", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col = "darkblue", tl.srt = 45, #Text label color and rotation
         # Combine with significance level
         p.mat = p_mat, sig.level = 0.01,  
         # hide correlation coefficient on the principal diagonal
         diag = FALSE 
         )

cor_5 <- rcorr(as.matrix(DISStockLag[2:nrow(DISStockLag),c("week","open","volume","percent_change_price","percent_change_volume_over_last_wk","previous_weeks_volume","days_to_next_dividend","percent_return_next_dividend","open_1")]))


M <- cor_5$r
p_mat <- cor_5$P
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M, method = "color", col = col(200),  
         type = "upper", order = "hclust", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col = "darkblue", tl.srt = 45, #Text label color and rotation
         # Combine with significance level
         p.mat = p_mat, sig.level = 0.01,  
         # hide correlation coefficient on the principal diagonal
         diag = FALSE 
         )

lm_stock_open_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
lm_stock<- lm(open ~ open_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend,
              data = stockDataTrain)
summary(lm_stock)

#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(lm_stock,newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict,stockDataTest$close)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_open <- predict(lm_stock,newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE,R2 = stockR2,wk26_open = pred_open)

}
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_RMSE_open_df <- lm_stock_open_fn(stockData,stock)

stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))

 
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))



stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))



stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))



stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))



stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))



stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))


stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_RMSE_open_df <- rbind(stock_RMSE_open_df,lm_stock_open_fn(stockData,stock))

stock_RMSE_open_df 
##     stock      RMSE        R2 wk26_open
## 1     MMM 1.8822136 0.9992394  91.47696
## 11    AXP 1.1921029 0.9933907  48.55281
## 12     AA 0.5162242 0.9908228  14.56018
## 13      T 0.6770761 0.9878839  30.69457
## 14    BAC 0.4478526 0.9910542  10.42286
## 15     BA 1.9425551 0.9956137  74.09903
## 16    CAT 6.0515915 0.8765235  92.96805
## 17    CVX 1.7895533 0.9964108  98.47113
## 18   CSCO 0.3624150 0.9639440  14.66808
## 19     KO 0.9126795 0.9963773  65.47234
## 110    DD 1.2962037 0.9999508  49.41491
## 111   XOM 2.0052543 0.9428082  79.30629
## 112    GE 0.3451259 0.8417561  17.96719
## 113   HPQ 3.5167208 0.8938147  34.99727
## 114    HD 0.9938124 0.9984861  34.27297
## 115  INTC 1.1926198 0.4172535  21.17506
## 116   IBM 3.5527344 0.8803476 163.55746
## 117   JNJ 1.8471691 0.8405906  66.59087
## 118   JPM 0.8610462 0.9956759  40.22083
## 119  KRFT 0.3935196 0.9978537  34.39044
## 120   MCD 0.8876942 0.9979608  82.19868
## 121   MRK 1.4100163 0.9969460  35.34482
## 122  MSFT 0.4044811 0.9745418  24.09279
## 123   PFE 0.4284994 0.9989998  20.12584
## 124    PG 2.0151904 0.5027857  64.52804
## 125   TRV 2.3396409 0.5183677  58.21255
## 126   UTX 3.1071277 0.3872658  82.88227
## 127    VZ 0.7559505 0.9978611  35.31459
## 128   WMT 0.9695742 0.9976689  52.72207
## 129   DIS 0.5907595 0.9993784  37.71319
stock_return_df <- stock_RMSE_beta_df %>% inner_join(stock_RMSE_open_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)

# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_return_df %>% arrange(desc(return)) %>% 
  hchart('column', hcaes(x = stock, y = return))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Percentage Returns across various stocks for week 26 for Linear Regression")
stock_return_df %>% select(stock,return) %>% arrange(desc(return))
##    stock      return
## 1    CAT  6.54427873
## 2     DD  5.34674464
## 3     AA  4.55722071
## 4     HD  2.13338082
## 5     VZ  1.96407736
## 6   CSCO  1.95839284
## 7    UTX  1.91533411
## 8    IBM  1.05081067
## 9   MSFT  0.68311928
## 10  KRFT  0.52011437
## 11   BAC  0.34816939
## 12   HPQ  0.16706292
## 13   AXP -0.08602459
## 14   PFE -0.13480583
## 15  INTC -0.37171502
## 16   MCD -0.41376286
## 17   WMT -0.50388511
## 18     T -0.55202997
## 19   MMM -0.60751327
## 20    KO -0.62531127
## 21   DIS -0.83893759
## 22   CVX -1.03539758
## 23    GE -1.54407809
## 24   JNJ -1.81685159
## 25   TRV -1.83714501
## 26   MRK -2.11864794
## 27   XOM -2.33161478
## 28   JPM -2.69675031
## 29    PG -3.03570844
## 30    BA -3.58630326
svr_close_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)

# Fit the model 
svm_stock <- train(close ~ close_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, 
                      data = stockDataTrain, method = "svmLinear", 
                      trControl = train_control,  preProcess = c("center","scale"))

summary(svm_stock)

#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$close)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_close <- predict(svm_stock, newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_close = pred_close)

}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm_RMSE_df <- svr_close_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm_RMSE_df <- rbind(stock_svm_RMSE_df,svr_close_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_RMSE_beta_df <- stock_svm_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_svm_RMSE_beta_df
##    stock      RMSE        R2 wk26_close        beta
## 1    MMM 0.3802664 0.9943455   91.08929 -0.13421958
## 2    AXP 0.2142198 0.9923841   48.29861  0.03239515
## 3     AA 0.1128046 0.9950341   15.29994  0.74909840
## 4      T 0.1503597 0.9927434   30.39391 -0.22282535
## 5    BAC 0.1081417 0.9969500   10.43436 -0.33132903
## 6     BA 0.5078308 0.9846753   71.57247 -0.23741113
## 7    CAT 1.3931046 0.9814216   98.73987  0.92789831
## 8    CVX 0.5250525 0.9967137   97.92804  0.30620406
## 9   CSCO 0.1927739 0.9922807   14.88133 -0.21872018
## 10    KO 0.2715334 0.9893847   65.02723 -0.04235410
## 11    DD 0.2941684 0.9945414   51.95032  0.84145395
## 12   XOM 1.0217313 0.9897918   77.12696  0.10266945
## 13    GE 0.2353072 0.9551450   17.72318 -0.12849894
## 14   HPQ 0.5112175 0.9814440   35.33649  0.24479489
## 15    HD 0.1478074 0.9723425   35.17451  0.19832895
## 16  INTC 0.2867833 0.9204146   21.12982  0.43036653
## 17   IBM 2.0683745 0.9491333  165.52047  0.13346597
## 18   JNJ 0.3075229 0.9878257   65.29513 -0.17262055
## 19   JPM 0.1850479 0.9936928   39.42824 -0.18909367
## 20  KRFT 0.2701153 0.9871429   34.48921 -0.06118033
## 21   MCD 0.5157497 0.9603963   81.76642 -0.42867590
## 22   MRK 0.2901749 0.9806675   34.68842 -0.46722907
## 23  MSFT 0.1117296 0.9951931   24.38165 -0.04802629
## 24   PFE 0.1022475 0.9931887   20.14373 -0.28261619
## 25    PG 1.0045209 0.8959072   62.58114 -0.22741460
## 26   TRV 1.1100552 0.9580407   57.22094  0.06636522
## 27   UTX 0.6241828 0.9832161   84.44774  0.44934253
## 28    VZ 0.1395358 0.9929129   36.03309  0.25720271
## 29   WMT 0.3549703 0.9531464   52.49328 -0.07107965
## 30   DIS 0.1861740 0.9958456   37.72777  0.19028154
svr_open_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)

# Fit the model 
svm_stock <- train(open ~ open_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, 
                      data = stockDataTrain, method = "svmLinear", 
                      trControl = train_control,  preProcess = c("center","scale"))

summary(svm_stock)

#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$open)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_open <- predict(svm_stock, newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_open = pred_open)

}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm_open_RMSE_df <- svr_open_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm_open_RMSE_df <- rbind(stock_svm_open_RMSE_df,svr_open_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_open_RMSE_beta_df <- stock_svm_open_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_svm_open_RMSE_beta_df
##    stock       RMSE        R2 wk26_open        beta
## 1    MMM 0.11819476 0.9982432  91.52528 -0.13421958
## 2    AXP 0.11327886 0.9941003  48.38340  0.03239515
## 3     AA 0.13011319 0.9747921  14.61926  0.74909840
## 4      T 0.26798950 0.9546457  30.39764 -0.22282535
## 5    BAC 0.15880046 0.9865890  10.37817 -0.33132903
## 6     BA 0.32460122 0.9776563  74.21541 -0.23741113
## 7    CAT 3.10120506 0.8876992  93.60060  0.92789831
## 8    CVX 0.26153278 0.9991382  98.71365  0.30620406
## 9   CSCO 0.42140624 0.9625653  14.74548 -0.21872018
## 10    KO 0.19728909 0.9933203  65.37944 -0.04235410
## 11    DD 0.27717571 0.9959650  49.62808  0.84145395
## 12   XOM 1.84866591 0.9464878  79.71929  0.10266945
## 13    GE 0.52123336 0.7922848  17.99457 -0.12849894
## 14   HPQ 1.40001293 0.8810331  34.48635  0.24479489
## 15    HD 0.27077757 0.9871082  34.25877  0.19832895
## 16  INTC 0.54144445 0.4254526  21.24827  0.43036653
## 17   IBM 3.52987809 0.8375230 163.33635  0.13346597
## 18   JNJ 0.93558619 0.8888535  65.94157 -0.17262055
## 19   JPM 0.22834391 0.9904501  40.26681 -0.18909367
## 20  KRFT 0.18248134 0.9908823  34.27309 -0.06118033
## 21   MCD 0.38411613 0.9808093  81.79427 -0.42867590
## 22   MRK 0.22935978 0.9890732  35.30697 -0.46722907
## 23  MSFT 0.20675445 0.9888172  23.98540 -0.04802629
## 24   PFE 0.06200967 0.9991266  20.14129 -0.28261619
## 25    PG 1.78123334 0.5652423  64.22452 -0.22741460
## 26   TRV 1.95590152 0.5667495  58.51209  0.06636522
## 27   UTX 2.38878103 0.2997049  82.80277  0.44934253
## 28    VZ 0.11777273 0.9890945  35.32093  0.25720271
## 29   WMT 0.19346038 0.9765084  52.75279 -0.07107965
## 30   DIS 0.10637997 0.9983706  37.79230  0.19028154
stock_svm_return_df <- stock_svm_RMSE_beta_df %>% inner_join(stock_svm_open_RMSE_beta_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)

# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_svm_return_df %>% arrange(desc(return)) %>% 
  hchart('column', hcaes(x = stock, y = return))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Percentage Returns across various stocks for week 26 by SVR Linear")
svr_close_rad_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)

# Fit the model 
svm_stock <- train(close ~ close_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, 
                      data = stockDataTrain, method = "svmRadial", 
                      trControl = train_control,  preProcess = c("center","scale"))

summary(svm_stock)

#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$close)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_close <- predict(svm_stock, newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_close = pred_close)

}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm_rad_RMSE_df <- svr_close_rad_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm_rad_RMSE_df <- rbind(stock_svm_rad_RMSE_df,svr_close_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_rad_RMSE_beta_df <- stock_svm_rad_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_svm_rad_RMSE_beta_df
##    stock      RMSE        R2 wk26_close        beta
## 1    MMM 2.3098261 0.2493209   91.66328 -0.13421958
## 2    AXP 0.6774418 0.9273653   48.48744  0.03239515
## 3     AA 0.5311777 0.8849904   15.91051  0.74909840
## 4      T 0.5965136 0.8635392   30.36960 -0.22282535
## 5    BAC 0.7614229 0.8404442   11.20514 -0.33132903
## 6     BA 2.1603040 0.4561310   70.49220 -0.23741113
## 7    CAT 5.0268347 0.8093156   99.53022  0.92789831
## 8    CVX 3.1155324 0.8996591   97.90406  0.30620406
## 9   CSCO 0.3462675 0.9945265   15.36172 -0.21872018
## 10    KO 0.8312368 0.7903015   64.30593 -0.04235410
## 11    DD 1.4223760 0.8140969   52.15566  0.84145395
## 12   XOM 3.0205880 0.6538505   80.01028  0.10266945
## 13    GE 0.3829774 0.9410425   18.60998 -0.12849894
## 14   HPQ 2.2120349 0.6496335   38.20461  0.24479489
## 15    HD 0.6910601 0.3221326   36.04940  0.19832895
## 16  INTC 0.6059049 0.7154331   21.11640  0.43036653
## 17   IBM 5.7330400 0.6305468  165.74619  0.13346597
## 18   JNJ 0.7309835 0.9423770   65.52934 -0.17262055
## 19   JPM 0.6676120 0.9913537   40.91818 -0.18909367
## 20  KRFT 0.3864214 0.9448114   34.34196 -0.06118033
## 21   MCD 1.3590576 0.7912213   81.69304 -0.42867590
## 22   MRK 0.6815219 0.6597023   34.40185 -0.46722907
## 23  MSFT 0.3896214 0.9720678   24.24733 -0.04802629
## 24   PFE 0.6385690 0.6794693   20.29929 -0.28261619
## 25    PG 0.6158132 0.9370559   63.14637 -0.22741460
## 26   TRV 2.5438304 0.3594661   57.93584  0.06636522
## 27   UTX 2.3415057 0.6959357   84.42623  0.44934253
## 28    VZ 0.9085642 0.6232649   35.84094  0.25720271
## 29   WMT 0.9449857 0.7335300   52.73962 -0.07107965
## 30   DIS 0.9236357 0.9037071   39.04941  0.19028154
svr_open_rad_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
# Set up Repeated k-fold Cross Validation
train_control <- trainControl(method="repeatedcv", number=10, repeats=3)

# Fit the model 
svm_stock <- train(open ~ open_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, 
                      data = stockDataTrain, method = "svmRadial", 
                      trControl = train_control,  preProcess = c("center","scale"))

summary(svm_stock)

#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(svm_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$open)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_open <- predict(svm_stock, newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_open = pred_open)

}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_svm__rad_open_RMSE_df <- svr_open_rad_stock_fn(stockData,stock)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_svm__rad_open_RMSE_df <- rbind(stock_svm__rad_open_RMSE_df,svr_open_rad_stock_fn(stockData,stock))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
stock_svm_open_RMSE_beta_df <- stock_svm__rad_open_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_svm_open_RMSE_beta_df
##    stock      RMSE        R2 wk26_open        beta
## 1    MMM 1.8250386 0.7010019  92.00644 -0.13421958
## 2    AXP 0.7980375 0.8735437  48.11660  0.03239515
## 3     AA 0.5972163 0.6531913  15.88656  0.74909840
## 4      T 0.5689411 0.7777718  30.18383 -0.22282535
## 5    BAC 0.6043063 0.8894164  11.43113 -0.33132903
## 6     BA 1.3356951 0.5193469  72.47214 -0.23741113
## 7    CAT 4.2943722 0.7886975  97.80009  0.92789831
## 8    CVX 2.8681780 0.9785458  99.03857  0.30620406
## 9   CSCO 0.8161072 0.9559383  16.08062 -0.21872018
## 10    KO 1.0447807 0.7931935  64.77545 -0.04235410
## 11    DD 1.4769285 0.9022599  51.19368  0.84145395
## 12   XOM 3.1785649 0.8244240  80.38796  0.10266945
## 13    GE 0.5865323 0.8728129  19.07402 -0.12849894
## 14   HPQ 3.3257450 0.3333301  38.04851  0.24479489
## 15    HD 1.5870997 0.1059224  36.60216  0.19832895
## 16  INTC 0.3626126 0.7151968  21.26088  0.43036653
## 17   IBM 6.7613364 0.7005999 164.83533  0.13346597
## 18   JNJ 0.8233969 0.8720155  66.18207 -0.17262055
## 19   JPM 0.9930447 0.9408369  42.03521 -0.18909367
## 20  KRFT 0.5430266 0.8648034  33.85192 -0.06118033
## 21   MCD 1.5458830 0.7540043  81.42829 -0.42867590
## 22   MRK 1.2805482 0.3542292  34.70689 -0.46722907
## 23  MSFT 0.4980569 0.8969188  24.02085 -0.04802629
## 24   PFE 0.5434059 0.7434710  20.30861 -0.28261619
## 25    PG 0.7356310 0.8706409  63.97363 -0.22741460
## 26   TRV 2.5478872 0.2897717  58.47670  0.06636522
## 27   UTX 2.6783541 0.1186493  82.81499  0.44934253
## 28    VZ 0.5921405 0.8738799  35.52798  0.25720271
## 29   WMT 0.7888649 0.5459826  52.74240 -0.07107965
## 30   DIS 1.1259752 0.8493104  39.27596  0.19028154
stock_svm_rad_return_df <- stock_svm_rad_RMSE_beta_df %>% inner_join(stock_svm_open_RMSE_beta_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)

# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_svm_rad_return_df %>% arrange(desc(return)) %>% 
  hchart('column', hcaes(x = stock, y = return))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Percentage Returns across various stocks for week 26 by SVR Radial")
svr_close_DT_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
## DT_stock <- train(close ~ close_1+volume+percent_change_price+
#                       percent_change_volume_over_last_wk+previous_weeks_volume+
#                       days_to_next_dividend+percent_return_next_dividend, 
#                       data = stockDataTrain, method = "rpart",
#                       trControl = train_control,  preProcess = c("center","scale"))
tree_stock <- tree::tree(close ~ close_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, 
                      data = stockDataTrain)
#summary(tree_stock)
# plot(tree_stock)
# text(tree_stock, pretty = 0)
# cv_stock <- cv.tree(tree_stock)
# cv_stock
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(tree_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$close)

# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$close)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_close <- predict(tree_stock, newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_close = pred_close)

}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_dtree_RMSE_df <- svr_close_DT_stock_fn(stockData,stock)

stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))

 
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))



stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))



stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))



stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))



stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))



stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))


stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_dtree_RMSE_df <- rbind(stock_dtree_RMSE_df,svr_close_DT_stock_fn(stockData,stock))

stock_Dtree_RMSE_beta_df <- stock_dtree_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_Dtree_RMSE_beta_df
##    stock      RMSE         R2 wk26_close        beta
## 1    MMM 1.3640127 0.77225249   89.22800 -0.13421958
## 2    AXP 1.7402782 0.21996476   49.64714  0.03239515
## 3     AA 0.4568888 0.72116531   15.77857  0.74909840
## 4      T 0.8734701 0.61358179   30.93000 -0.22282535
## 5    BAC 0.7099938 0.74272164   11.71111 -0.33132903
## 6     BA 1.2736193 0.93588130   70.69800 -0.23741113
## 7    CAT 5.3173638 0.65013991   99.06571  0.92789831
## 8    CVX 3.9167189 0.80538358   98.17750  0.30620406
## 9   CSCO 1.1231843 0.65950923   15.81800 -0.21872018
## 10    KO 0.5033617 0.94056962   65.31400 -0.04235410
## 11    DD 1.2627142 0.79631176   50.49200  0.84145395
## 12   XOM 3.0970225 0.61688286   79.96400  0.10266945
## 13    GE 0.4896483 0.90083383   18.99000 -0.12849894
## 14   HPQ 1.6905548 0.81592494   35.86000  0.24479489
## 15    HD 0.4389129 0.87120818   34.92600  0.19832895
## 16  INTC 1.0368573 0.09373524   20.80125  0.43036653
## 17   IBM 4.5712513 0.53802047  164.50400  0.13346597
## 18   JNJ 1.2886980 0.72954963   66.11714 -0.17262055
## 19   JPM 1.0395242 0.88678157   42.08167 -0.18909367
## 20  KRFT 0.5278509 0.93864582   34.22889 -0.06118033
## 21   MCD 1.2810933 0.73549862   80.97286 -0.42867590
## 22   MRK 1.4454068 0.09309472   36.20000 -0.46722907
## 23  MSFT 0.9027134 0.51246977   24.31833 -0.04802629
## 24   PFE 0.3957176 0.91900197   20.13800 -0.28261619
## 25    PG 0.4911606 0.94846701   61.97000 -0.22741460
## 26   TRV 3.9221258 0.35116913   58.04000  0.06636522
## 27   UTX 2.6940861 0.59835061   86.35250  0.44934253
## 28    VZ 0.6662863 0.88596265   35.79375  0.25720271
## 29   WMT 0.6671347 0.95123438   52.84875 -0.07107965
## 30   DIS 0.6211773 0.91267898   39.09600  0.19028154
svr_open_DT_stock_fn <- function(stockData,tik)
{
set.seed(123)
#70-30 split
# train = c(1:12) #Row indexes
# stockDataTrain <- stockData %>% filter(ï..quarter ==1)
# stockDataTest <- stockData %>% filter(ï..quarter ==2)

train = sample(1:nrow(stockData), nrow(stockData)/1.43) #Row indexes
stockDataValidation <- stockData %>% filter(week ==25)
stockDataTrain <- stockData[train,]
stockDataTrain <- stockDataTrain %>% filter(week!=25)
stockDataTest <- stockData[-train,]


#------------------------------------
#Model - Train
#-----------------------------------
## DT_stock <- train(close ~ close_1+volume+percent_change_price+
#                       percent_change_volume_over_last_wk+previous_weeks_volume+
#                       days_to_next_dividend+percent_return_next_dividend, 
#                       data = stockDataTrain, method = "rpart",
#                       trControl = train_control,  preProcess = c("center","scale"))
tree_open_stock <- tree::tree(open ~ open_1+volume+percent_change_price+
                      percent_change_volume_over_last_wk+previous_weeks_volume+
                      days_to_next_dividend+percent_return_next_dividend, 
                      data = stockDataTrain)
#summary(tree_stock)
# plot(tree_stock)
# text(tree_stock, pretty = 0)
# cv_stock <- cv.tree(tree_stock)
# cv_stock
#------------------------------------
#Model - test
#-----------------------------------
stockpredict <- predict(tree_open_stock, newdata = stockDataTest)
stockRMSE <- RMSE(stockpredict, stockDataTest$open)


# (b) R-square
stockR2 <- R2(stockpredict,stockDataTest$open)



#-------------------------------------------
# Return using Week 25
#-----------------------------------------

pred_open <- predict(tree_open_stock, newdata = stockDataValidation)

data.frame(stock = tik, RMSE = stockRMSE, R2 = stockR2, wk26_open = pred_open)

}
#create train and test data sets
stock <- "MMM"
stockData <- MMMStockLag[2:nrow(MMMStockLag),]
stock_dtree_open_RMSE_df <- svr_open_DT_stock_fn(stockData,stock)

stock <- "AXP"
stockData <- AXPStockLag[2:nrow(AXPStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "AA"
stockData <- AAStockLag[2:nrow(AAStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))

 
stock <- "T"
stockData <- TStockLag[2:nrow(TStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "BAC"
stockData <- BACStockLag[2:nrow(BACStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "BA"
stockData <- BAStockLag[2:nrow(BAStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "CAT"
stockData <- CATStockLag[2:nrow(CATStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "CVX"
stockData <- CVXStockLag[2:nrow(CVXStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "CSCO"
stockData <- CSCOStockLag[2:nrow(CSCOStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "KO"
stockData <- KOStockLag[2:nrow(KOStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "DD"
stockData <- DDStockLag[2:nrow(DDStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "XOM"
stockData <- XOMStockLag[2:nrow(XOMStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "GE"
stockData <- GEStockLag[2:nrow(GEStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "HPQ"
stockData <- HPQStockLag[2:nrow(HPQStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "HD"
stockData <- HDStockLag[2:nrow(HDStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "INTC"
stockData <- INTCStockLag[2:nrow(INTCStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))



stock <- "IBM"
stockData <- IBMStockLag[2:nrow(IBMStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "JNJ"
stockData <- JNJStockLag[2:nrow(JNJStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))



stock <- "JPM"
stockData <- JPMStockLag[2:nrow(JPMStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))



stock <- "KRFT"
stockData <- KRFTStockLag[2:nrow(KRFTStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))



stock <- "MCD"
stockData <- MCDStockLag[2:nrow(MCDStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "MRK"
stockData <- MRKStockLag[2:nrow(MRKStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))



stock <- "MSFT"
stockData <- MSFTStockLag[2:nrow(MSFTStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "PFE"
stockData <- PFEStockLag[2:nrow(PFEStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "PG"
stockData <- PGStockLag[2:nrow(PGStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "TRV"
stockData <- TRVStockLag[2:nrow(TRVStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "UTX"
stockData <- UTXStockLag[2:nrow(UTXStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))
## Warning in cor(obs, pred, use = ifelse(na.rm, "complete.obs", "everything")):
## the standard deviation is zero
stock <- "VZ"
stockData <- VZStockLag[2:nrow(VZStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "WMT"
stockData <- WMTStockLag[2:nrow(WMTStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))


stock <- "DIS"
stockData <- DISStockLag[2:nrow(DISStockLag),]
stock_dtree_open_RMSE_df <- rbind(stock_dtree_open_RMSE_df,svr_open_DT_stock_fn(stockData,stock))

stock_Dtree__open_RMSE_beta_df <- stock_dtree_open_RMSE_df %>% inner_join(stockbeta_df, by = c("stock"))

stock_Dtree__open_RMSE_beta_df
##    stock      RMSE          R2 wk26_open        beta
## 1    MMM 2.0643452 0.433372010  89.74000 -0.13421958
## 2    AXP 1.2396555 0.710850398  49.72429  0.03239515
## 3     AA 0.5866951 0.522453094  16.16250  0.74909840
## 4      T 1.2937533 0.198892956  30.95556 -0.22282535
## 5    BAC 0.6888932 0.751696741  11.81625 -0.33132903
## 6     BA 2.0676851 0.037960260  70.76800 -0.23741113
## 7    CAT 3.9473770 0.871723042  97.79000  0.92789831
## 8    CVX 2.7034011 0.857684982  96.11200  0.30620406
## 9   CSCO 1.0232202 0.797148180  16.20000 -0.21872018
## 10    KO 1.5738309 0.416030578  63.67750 -0.04235410
## 11    DD 1.3046509 0.888054787  50.64167  0.84145395
## 12   XOM 4.6601760 0.104914615  85.23600  0.10266945
## 13    GE 0.6399149 0.842387356  19.47222 -0.12849894
## 14   HPQ 1.5957313 0.831523206  36.83600  0.24479489
## 15    HD 0.9926774 0.819579263  35.41800  0.19832895
## 16  INTC 0.5366394 0.514737396  20.43667  0.43036653
## 17   IBM 6.0808974 0.386377616 163.66000  0.13346597
## 18   JNJ 2.1537253 0.716364977  66.01857 -0.17262055
## 19   JPM 1.1971007 0.821442805  42.20000 -0.18909367
## 20  KRFT 0.4543083 0.880131933  34.28571 -0.06118033
## 21   MCD 2.6241116 0.247825969  80.42714 -0.42867590
## 22   MRK 1.9259772 0.002181522  33.18444 -0.46722907
## 23  MSFT 0.5226120 0.845113889  24.36400 -0.04802629
## 24   PFE 0.4126617 0.911801711  20.77833 -0.28261619
## 25    PG 1.8064435 0.513335108  65.95000 -0.22741460
## 26   TRV 2.8095352 0.036388373  58.05000  0.06636522
## 27   UTX 2.9995195          NA  82.28556  0.44934253
## 28    VZ 0.7409469 0.493517374  36.20800  0.25720271
## 29   WMT 1.0492123 0.538332209  52.67571 -0.07107965
## 30   DIS 1.0735381 0.816328766  39.67800  0.19028154
stock_svr_dt_return_df <- stock_Dtree_RMSE_beta_df %>% inner_join(stock_Dtree__open_RMSE_beta_df, by = c("stock")) %>% mutate(return = ((wk26_close-wk26_open)/wk26_open)*100)

# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- stock_svr_dt_return_df %>% arrange(desc(return)) %>% 
  hchart('column', hcaes(x = stock, y = return))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Percentage Returns across various stocks for week 26 by Decision Tree Regression")
# stock_return_df
# stock_svm_return_df
# stock_svm_rad_return_df
# stock_svr_dt_return_df


#Linear model average RMSE and R2
lm_compare_df <- stock_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
                                            meanOpenRMSE = mean(RMSE.y),
                                            meanCloseR2 = mean(R2.x),
                                            meanOpenR2 = mean(R2.y))
lm_compare_df <- cbind(Model = 'Linear Regression',lm_compare_df)


#SVR Linear model average RMSE and R2
lm_compare_df_tmp <- stock_svm_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
                                            meanOpenRMSE = mean(RMSE.y),
                                            meanCloseR2 = mean(R2.x),
                                            meanOpenR2 = mean(R2.y))
lm_compare_df_tmp <- cbind(Model = 'SVR Linear Kernel',lm_compare_df_tmp)

lm_compare_df <- rbind(lm_compare_df,lm_compare_df_tmp)

#SVR Radial average RMSE and R2
lm_compare_df_tmp <- stock_svm_rad_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
                                            meanOpenRMSE = mean(RMSE.y),
                                            meanCloseR2 = mean(R2.x),
                                            meanOpenR2 = mean(R2.y))
lm_compare_df_tmp <- cbind(Model = 'SVR Radial Kernel',lm_compare_df_tmp)

lm_compare_df <- rbind(lm_compare_df,lm_compare_df_tmp)

#Decision Tree Regression model average RMSE and R2
lm_compare_df_tmp <- stock_svr_dt_return_df %>% summarise(meanCloseRMSE = mean(RMSE.x),
                                            meanOpenRMSE = mean(RMSE.y),
                                            meanCloseR2 = mean(R2.x),
                                            meanOpenR2 = mean(na.omit(R2.y)))
lm_compare_df_tmp <- cbind(Model = 'Decision Tree Regression',lm_compare_df_tmp)

lm_compare_df <- rbind(lm_compare_df,lm_compare_df_tmp)



# lm_compare_df <- as.data.frame(t(as.matrix(lm_compare_df)))
# 
# model_name <- c(lm_compare_df[1,1],lm_compare_df[1,2],lm_compare_df[1,3],lm_compare_df[1,4])
# 
# lm_compare_df <- lm_compare_df[-1,]
# 
# colnames(lm_compare_df) <- model_name
# 
# mrownames <- rownames(lm_compare_df)
# 
# lm_compare_df <- cbind(Means = mrownames,lm_compare_df)
# 
# # rownames(lm_compare_df) <- c(1:4)
# 
# lm_compare_df %>% ggplot(aes(Model)) +
#   geom_bar(geom=GeomBar)
# 
# 
# # Use position=position_dodge()
# # ggplot(data=lm_compare_df, aes(x=Means, y=len, fill=supp)) +
# # geom_bar(stat="identity", position=position_dodge())
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
  hchart('column', hcaes(x = Model, y = meanCloseRMSE))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Stocks' Close Price mean RMSE Comparision")
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
  hchart('column', hcaes(x = Model, y = meanOpenRMSE))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Stocks' Open Price mean RMSE Comparision")
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
  hchart('column', hcaes(x = Model, y = meanOpenR2))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Stocks' Open Price mean R-Squared Comparision")
# stock_return_df %>% ggplot(mapping = aes(,return))+geom
hc <- lm_compare_df %>%
  hchart('column', hcaes(x = Model, y = meanCloseR2))
hc %>% 
  hc_add_theme(hc_theme_economist()) %>%
  hc_title(text = "Stocks' Close Price mean R-Squared Comparision")