setwd("C:/Users/arami/Desktop/DA 6813/CASE STUDY 3")
dow = read.table("dow_jones_index.data", sep = ",", header = TRUE)
str(dow)
## 'data.frame':    750 obs. of  16 variables:
##  $ quarter                           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ stock                             : chr  "AA" "AA" "AA" "AA" ...
##  $ date                              : chr  "1/7/2011" "1/14/2011" "1/21/2011" "1/28/2011" ...
##  $ open                              : chr  "$15.82" "$16.71" "$16.19" "$15.87" ...
##  $ high                              : chr  "$16.72" "$16.71" "$16.38" "$16.63" ...
##  $ low                               : chr  "$15.78" "$15.64" "$15.60" "$15.82" ...
##  $ close                             : chr  "$16.42" "$15.97" "$15.79" "$16.13" ...
##  $ volume                            : int  239655616 242963398 138428495 151379173 154387761 114691279 80023895 132981863 109493077 114332562 ...
##  $ percent_change_price              : num  3.79 -4.43 -2.47 1.64 5.93 ...
##  $ percent_change_volume_over_last_wk: num  NA 1.38 -43.02 9.36 1.99 ...
##  $ previous_weeks_volume             : int  NA 239655616 242963398 138428495 151379173 154387761 114691279 80023895 132981863 109493077 ...
##  $ next_weeks_open                   : chr  "$16.71" "$16.19" "$15.87" "$16.18" ...
##  $ next_weeks_close                  : chr  "$15.97" "$15.79" "$16.13" "$17.14" ...
##  $ percent_change_next_weeks_price   : num  -4.428 -2.471 1.638 5.933 0.231 ...
##  $ days_to_next_dividend             : int  26 19 12 5 97 90 83 76 69 62 ...
##  $ percent_return_next_dividend      : num  0.183 0.188 0.19 0.186 0.175 ...
dow$date = lubridate::mdy(dow$date)
dow$open = as.numeric(gsub("\\$", "", dow$open))
dow$high = as.numeric(gsub("\\$", "", dow$high))
dow$low = as.numeric(gsub("\\$", "", dow$low))
dow$close = as.numeric(gsub("\\$", "", dow$close))
dow$volume = as.numeric(dow$volume)
dow$next_weeks_open = as.numeric(gsub("\\$", "", dow$next_weeks_open))
dow$next_weeks_close = as.numeric(gsub("\\$", "", dow$next_weeks_close))
str(dow)
## 'data.frame':    750 obs. of  16 variables:
##  $ quarter                           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ stock                             : chr  "AA" "AA" "AA" "AA" ...
##  $ date                              : Date, format: "2011-01-07" "2011-01-14" ...
##  $ open                              : num  15.8 16.7 16.2 15.9 16.2 ...
##  $ high                              : num  16.7 16.7 16.4 16.6 17.4 ...
##  $ low                               : num  15.8 15.6 15.6 15.8 16.2 ...
##  $ close                             : num  16.4 16 15.8 16.1 17.1 ...
##  $ volume                            : num  2.40e+08 2.43e+08 1.38e+08 1.51e+08 1.54e+08 ...
##  $ percent_change_price              : num  3.79 -4.43 -2.47 1.64 5.93 ...
##  $ percent_change_volume_over_last_wk: num  NA 1.38 -43.02 9.36 1.99 ...
##  $ previous_weeks_volume             : int  NA 239655616 242963398 138428495 151379173 154387761 114691279 80023895 132981863 109493077 ...
##  $ next_weeks_open                   : num  16.7 16.2 15.9 16.2 17.3 ...
##  $ next_weeks_close                  : num  16 15.8 16.1 17.1 17.4 ...
##  $ percent_change_next_weeks_price   : num  -4.428 -2.471 1.638 5.933 0.231 ...
##  $ days_to_next_dividend             : int  26 19 12 5 97 90 83 76 69 62 ...
##  $ percent_return_next_dividend      : num  0.183 0.188 0.19 0.186 0.175 ...
dow2 = dow[, c(1:11,14,15,16)]

dow.train = dow2[dow2$quarter == 1,]
dow.test = dow2[dow2$quarter == 2,]

AA Regular Linear Model

set.seed(42)
#Grabbing the columns without NAs
AAtrain = dow.train[dow.train$stock == "AA",c(3:9,12,13)]
AAtest = dow.test[dow.test$stock == "AA",c(3:9,12,13)]

#Changing the index to be date and removing the date from the data frame
row.names(AAtrain) = AAtrain$date
AAtrain = AAtrain[,-1]
row.names(AAtest) = AAtest$date
AAtest = AAtest[,-1]

AAlm = lm(percent_change_next_weeks_price ~ ., AAtrain)
summary(AAlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = AAtrain)
## 
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18 
##    -2.3878     1.2061    -2.2751     3.0591     0.1602     1.3261    -0.1014 
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 
##     0.7606    -2.8699    -0.2603     2.0997    -0.7173 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)            6.152e+01  6.622e+01   0.929    0.405
## open                  -1.704e+02  2.417e+02  -0.705    0.520
## high                   3.863e+00  9.696e+00   0.398    0.711
## low                   -8.221e-02  5.356e+00  -0.015    0.988
## close                  1.631e+02  2.371e+02   0.688    0.529
## volume                -2.655e-08  3.645e-08  -0.728    0.507
## percent_change_price  -2.718e+01  3.932e+01  -0.691    0.527
## days_to_next_dividend -8.842e-03  7.007e-02  -0.126    0.906
## 
## Residual standard error: 3.053 on 4 degrees of freedom
## Multiple R-squared:  0.6403, Adjusted R-squared:  0.01089 
## F-statistic: 1.017 on 7 and 4 DF,  p-value: 0.5254
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.0.3
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
AAlm = ols_step_backward_p(AAlm, prem = 0.05, details = F)
AAlm$model
## 
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")), 
##     data = l)
## 
## Coefficients:
## (Intercept)         open       volume  
##   7.465e+01   -4.179e+00   -3.903e-08
AAlm = lm(percent_change_next_weeks_price ~ open + volume, AAtrain)
summary(AAlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + volume, 
##     data = AAtrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6203 -0.6679  0.0684  1.4673  3.5047 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  7.465e+01  2.478e+01   3.013   0.0147 *
## open        -4.179e+00  1.437e+00  -2.909   0.0174 *
## volume      -3.903e-08  1.522e-08  -2.564   0.0305 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.309 on 9 degrees of freedom
## Multiple R-squared:  0.5373, Adjusted R-squared:  0.4345 
## F-statistic: 5.226 on 2 and 9 DF,  p-value: 0.03117
AAtrain$train_preds = predict(AAlm, data = AAtrain, type = "response")
AAtest$test_preds = predict(AAlm, newdata = AAtest, type = "response")
library(car)
## Loading required package: carData
vif(AAlm)
##     open   volume 
## 1.250613 1.250613
par(mfrow=c(2,2))
plot(AAlm, which=c(1:4)) 

library(ggplot2)

ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds)) +
  theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =test_preds)) +
  theme_minimal()

AAtrainmse = mean((AAtrain$percent_change_next_weeks_price - AAtrain$train_preds)^2)
AAtrainmae = mean(abs(AAtrain$percent_change_next_weeks_price - AAtrain$train_preds))
train_error = cbind(AAtrainmse, AAtrainmae)

AAtestmse = mean((AAtest$percent_change_next_weeks_price - AAtest$test_preds)^2)
AAtestmae = mean(abs(AAtest$percent_change_next_weeks_price - AAtest$test_preds))
train_error = cbind(train_error, AAtestmse)
train_error = cbind(train_error, AAtestmae)

Linear Model Without Lag

day = seq(from = 1, to = dim(AAtrain)[1], by = 1)

AAlm2 = lm(AAtrain$percent_change_next_weeks_price ~ day - 1 )
summary(AAlm2)
## 
## Call:
## lm(formula = AAtrain$percent_change_next_weeks_price ~ day - 
##     1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4977 -2.3408 -0.5816  1.2232  5.6563 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)
## day  0.06923    0.11866   0.583    0.571
## 
## Residual standard error: 3.025 on 11 degrees of freedom
## Multiple R-squared:  0.03001,    Adjusted R-squared:  -0.05817 
## F-statistic: 0.3404 on 1 and 11 DF,  p-value: 0.5714
AAtrain$train_preds2 = predict(AAlm2, data = AAtrain, type = "response")
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds2)) +
  theme_minimal()

Checking for lags

lag.plot(AAtrain$percent_change_next_weeks_price, pch = ".", set.lags = 1:4) 

lag.plot(AAtrain$close, pch = ".", set.lags = 1:4) 

lag.plot(AAtrain$open, pch = ".", set.lags = 1:4) 

lag.plot(AAtrain$percent_change_price, pch = ".", set.lags = 1:4) 

AXP Regular Linear Model

set.seed(42)
#Grabbing the columns without NAs
AXPtrain = dow.train[dow.train$stock == "AXP",c(3:9,12,13)]
AXPtest = dow.test[dow.test$stock == "AXP",c(3:9,12,13)]

#Changing the index to be date and removing the date from the data frame
row.names(AXPtrain) = AXPtrain$date
AXPtrain = AXPtrain[,-1]
row.names(AXPtest) = AXPtest$date
AXPtest = AXPtest[,-1]

AXPlm = lm(percent_change_next_weeks_price ~ ., AXPtrain)
summary(AXPlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain)
## 
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18 
##   0.541448   0.064563  -0.044797   0.187946   0.003855  -0.501065  -0.425276 
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 
##   0.164832  -1.449268   0.464004   0.164303   0.829456 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            1.807e+02  2.388e+01   7.564  0.00164 **
## open                   8.042e+01  2.591e+01   3.104  0.03609 * 
## high                  -5.673e+00  1.254e+00  -4.523  0.01063 * 
## low                    3.273e+00  1.067e+00   3.067  0.03739 * 
## close                 -8.197e+01  2.565e+01  -3.196  0.03302 * 
## volume                 4.846e-08  7.206e-08   0.673  0.53810   
## percent_change_price   3.664e+01  1.156e+01   3.171  0.03384 * 
## days_to_next_dividend  3.484e-02  1.370e-02   2.543  0.06379 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9778 on 4 degrees of freedom
## Multiple R-squared:  0.9627, Adjusted R-squared:  0.8974 
## F-statistic: 14.74 on 7 and 4 DF,  p-value: 0.0103
AXPlm = ols_step_backward_p(AXPlm, prem = 0.05, details = F)
AXPlm$model
## 
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")), 
##     data = l)
## 
## Coefficients:
##           (Intercept)                   open                   high  
##             189.77500               82.95806               -5.40782  
##                   low                  close   percent_change_price  
##               2.84547              -84.52516               37.73375  
## days_to_next_dividend  
##               0.03644
AXPlm = lm(percent_change_next_weeks_price ~ open + high + low + close + percent_change_price + days_to_next_dividend, AXPtrain)
summary(AXPlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + high + 
##     low + close + percent_change_price + days_to_next_dividend, 
##     data = AXPtrain)
## 
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18 
##   0.626703  -0.412721   0.171839   0.219440   0.214900  -0.256396  -0.526855 
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 
##  -0.002862  -1.495415   0.493249   0.110962   0.857157 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           189.77500   18.54834  10.231 0.000153 ***
## open                   82.95806   24.19161   3.429 0.018652 *  
## high                   -5.40782    1.12337  -4.814 0.004824 ** 
## low                     2.84547    0.80875   3.518 0.016949 *  
## close                 -84.52516   23.93431  -3.532 0.016712 *  
## percent_change_price   37.73375   10.79514   3.495 0.017369 *  
## days_to_next_dividend   0.03644    0.01273   2.862 0.035307 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9227 on 5 degrees of freedom
## Multiple R-squared:  0.9585, Adjusted R-squared:  0.9086 
## F-statistic: 19.23 on 6 and 5 DF,  p-value: 0.002607
vif(AXPlm)
##                  open                  high                   low 
##           8200.952596             13.991812              6.905939 
##                 close  percent_change_price days_to_next_dividend 
##           9406.878888          14505.652148              1.333786
AXPlm = lm(percent_change_next_weeks_price ~ open + high + low + close + days_to_next_dividend, AXPtrain)
summary(AXPlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + high + 
##     low + close + days_to_next_dividend, data = AXPtrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0485 -0.5130  0.4404  0.7632  1.3007 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           154.49290   26.36121   5.861  0.00109 **
## open                   -1.58602    0.80094  -1.980  0.09500 . 
## high                   -2.32627    1.17945  -1.972  0.09604 . 
## low                     1.37747    1.17083   1.176  0.28395   
## close                  -0.88196    0.83323  -1.058  0.33058   
## days_to_next_dividend   0.04485    0.02118   2.118  0.07853 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.563 on 6 degrees of freedom
## Multiple R-squared:  0.857,  Adjusted R-squared:  0.7378 
## F-statistic: 7.189 on 5 and 6 DF,  p-value: 0.0162
AXPtrain$train_preds = predict(AXPlm, data = AXPtrain, type = "response")
AXPtest$test_preds = predict(AXPlm, newdata = AXPtest, type = "response")
vif(AXPlm)
##                  open                  high                   low 
##              3.132585              5.374669              5.043664 
##                 close days_to_next_dividend 
##              3.972850              1.286171
par(mfrow=c(2,2))
plot(AAlm, which=c(1:4)) 

ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds)) +
  theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =test_preds)) +
  theme_minimal()

AXPtrainmse = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$train_preds)^2)
AXPtrainmae = mean(abs(AXPtrain$percent_change_next_weeks_price - AXPtrain$train_preds))
train_error = cbind(train_error, AXPtrainmse)
train_error = cbind(train_error, AXPtrainmae)

AXPtestmse = mean((AXPtest$percent_change_next_weeks_price - AXPtest$test_preds)^2)
AXPtestmae = mean(abs(AXPtest$percent_change_next_weeks_price - AXPtest$test_preds))
train_error = cbind(train_error, AXPtestmse)
train_error = cbind(train_error, AXPtestmae)
AXPlm2 = lm(AXPtrain$percent_change_next_weeks_price ~ day - 1 )
summary(AXPlm2)
## 
## Call:
## lm(formula = AXPtrain$percent_change_next_weeks_price ~ day - 
##     1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8347 -1.1246 -0.1756  0.9375  6.2151 
## 
## Coefficients:
##     Estimate Std. Error t value Pr(>|t|)
## day  0.02632    0.12007   0.219    0.831
## 
## Residual standard error: 3.061 on 11 degrees of freedom
## Multiple R-squared:  0.004349,   Adjusted R-squared:  -0.08617 
## F-statistic: 0.04804 on 1 and 11 DF,  p-value: 0.8305
AXPtrain$train_preds2 = predict(AXPlm2, data = AXPtrain, type = "response")
ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds2)) +
  theme_minimal()

lag.plot(AXPtrain$percent_change_next_weeks_price, pch = ".", set.lags = 1:4) 

lag.plot(AXPtrain$close, pch = ".", set.lags = 1:4) 

lag.plot(AXPtrain$open, pch = ".", set.lags = 1:4) 

lag.plot(AXPtrain$percent_change_price, pch = ".", set.lags = 1:4) 

BA Regular Linear Model

set.seed(42)
#Grabbing the columns without NAs
BAtrain = dow.train[dow.train$stock == "BA",c(3:9,12,13)]
BAtest = dow.test[dow.test$stock == "BA",c(3:9,12,13)]

#Changing the index to be date and removing the date from the data frame
row.names(BAtrain) = BAtrain$date
BAtrain = BAtrain[,-1]
row.names(BAtest) = BAtest$date
BAtest = BAtest[,-1]

BAlm = lm(percent_change_next_weeks_price ~ ., BAtrain)
summary(BAlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = BAtrain)
## 
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18 
##   -0.40080   -1.45053   -1.35852    1.04623    0.75875    1.12291    0.64740 
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 
##   -2.16209    0.01231   -1.86918    1.57167    2.08185 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)            9.640e+01  9.468e+01   1.018    0.366
## open                   3.474e+00  3.746e+01   0.093    0.931
## high                   2.378e+00  2.335e+00   1.018    0.366
## low                   -1.135e+00  1.288e+00  -0.881    0.428
## close                 -6.002e+00  3.684e+01  -0.163    0.878
## volume                -3.849e-07  3.357e-07  -1.146    0.316
## percent_change_price   2.760e+00  2.580e+01   0.107    0.920
## days_to_next_dividend  6.674e-03  4.085e-02   0.163    0.878
## 
## Residual standard error: 2.368 on 4 degrees of freedom
## Multiple R-squared:  0.5505, Adjusted R-squared:  -0.236 
## F-statistic: 0.6999 on 7 and 4 DF,  p-value: 0.6814
BAlm = ols_step_backward_p(BAlm, prem = 0.05, details = F)
BAlm$model
## 
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")), 
##     data = l)
## 
## Coefficients:
## (Intercept)        close  
##     61.2202      -0.8537
BAlm = lm(percent_change_next_weeks_price ~ close, BAtrain)
summary(BAlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ close, data = BAtrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2253 -0.5766  0.4247  0.9765  2.1131 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  61.2202    26.4362   2.316   0.0431 *
## close        -0.8537     0.3709  -2.302   0.0441 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.806 on 10 degrees of freedom
## Multiple R-squared:  0.3463, Adjusted R-squared:  0.2809 
## F-statistic: 5.298 on 1 and 10 DF,  p-value: 0.04413
BAtrain$train_preds = predict(BAlm, data = BAtrain, type = "response")
BAtest$test_preds = predict(BAlm, newdata = BAtest, type = "response")
ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds)) +
  theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =test_preds)) +
  theme_minimal()

BAtrainmse = mean((BAtrain$percent_change_next_weeks_price - BAtrain$train_preds)^2)
BAtrainmae = mean(abs(BAtrain$percent_change_next_weeks_price - BAtrain$train_preds))
train_error = cbind(train_error, BAtrainmse)
train_error = cbind(train_error, BAtrainmae)

BAtestmse = mean((BAtest$percent_change_next_weeks_price - BAtest$test_preds)^2)
BAtestmae = mean(abs(BAtest$percent_change_next_weeks_price - BAtest$test_preds))
train_error = cbind(train_error, BAtestmse)
train_error = cbind(train_error, BAtestmae)

BAC Regular Linear Model

set.seed(42)
#Grabbing the columns without NAs
BACtrain = dow.train[dow.train$stock == "BAC",c(3:9,12,13)]
BACtest = dow.test[dow.test$stock == "BAC",c(3:9,12,13)]

#Changing the index to be date and removing the date from the data frame
row.names(BACtrain) = BACtrain$date
BACtrain = BACtrain[,-1]
row.names(BACtest) = BACtest$date
BACtest = BACtest[,-1]

BAClm = lm(percent_change_next_weeks_price ~ ., BACtrain)
summary(BAClm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = BACtrain)
## 
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18 
##  -0.498049  -0.100233  -0.006617   1.644130   0.247643   0.455633  -0.634564 
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 
##   0.093428   3.891850  -0.422557  -3.493422  -1.177242 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)            6.627e+01  5.191e+01   1.277    0.271
## open                  -3.311e+01  9.341e+01  -0.354    0.741
## high                   3.970e+00  1.063e+01   0.373    0.728
## low                    1.227e+01  5.891e+00   2.083    0.106
## close                  1.213e+01  9.071e+01   0.134    0.900
## volume                 7.965e-09  4.920e-09   1.619    0.181
## percent_change_price  -3.296e+00  1.321e+01  -0.250    0.815
## days_to_next_dividend -3.966e-02  3.533e-02  -1.123    0.324
## 
## Residual standard error: 2.853 on 4 degrees of freedom
## Multiple R-squared:  0.8104, Adjusted R-squared:  0.4786 
## F-statistic: 2.443 on 7 and 4 DF,  p-value: 0.2029
BAClm = ols_step_backward_p(BAClm, prem = 0.05, details = F)
BAClm$model
## 
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")), 
##     data = l)
## 
## Coefficients:
##          (Intercept)                  open                   low  
##            5.280e+01            -1.664e+01             1.276e+01  
##               volume  percent_change_price  
##            9.013e-09            -1.215e+00
BAClm = lm(percent_change_next_weeks_price ~ open + close + low + volume + percent_change_price, BACtrain)
summary(BAClm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + close + 
##     low + volume + percent_change_price, data = BACtrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8797 -0.5751 -0.0326  1.0836  2.6131 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           5.757e+01  4.437e+01   1.298   0.2421  
## open                  8.691e-01  6.946e+01   0.013   0.9904  
## close                -1.837e+01  7.270e+01  -0.253   0.8089  
## low                   1.333e+01  5.356e+00   2.488   0.0473 *
## volume                8.608e-09  3.958e-09   2.175   0.0726 .
## percent_change_price  1.349e+00  1.015e+01   0.133   0.8987  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.672 on 6 degrees of freedom
## Multiple R-squared:  0.7505, Adjusted R-squared:  0.5426 
## F-statistic:  3.61 on 5 and 6 DF,  p-value: 0.07477
vif(BAClm)
##                 open                close                  low 
##           997.892964          2108.264982             6.404622 
##               volume percent_change_price 
##             1.454521          2627.307409
BAClm = lm(percent_change_next_weeks_price ~ open + close + low + volume, BACtrain)
summary(BAClm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + close + 
##     low + volume, data = BACtrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9390 -0.5759 -0.0083  1.1697  2.5086 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  5.510e+01  3.735e+01   1.475   0.1836  
## open        -8.350e+00  2.719e+00  -3.071   0.0180 *
## close       -8.724e+00  2.956e+00  -2.951   0.0214 *
## low          1.306e+01  4.590e+00   2.844   0.0249 *
## volume       8.823e-09  3.349e-09   2.634   0.0337 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.478 on 7 degrees of freedom
## Multiple R-squared:  0.7498, Adjusted R-squared:  0.6068 
## F-statistic: 5.244 on 4 and 7 DF,  p-value: 0.02839
vif(BAClm)
##     open    close      low   volume 
## 1.778670 4.054507 5.470889 1.211390
BACtrain$train_preds = predict(BAClm, data = BACtrain, type = "response")
BACtest$test_preds = predict(BAClm, newdata = BACtest, type = "response")
ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds)) +
  theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =test_preds)) +
  theme_minimal()

BACtrainmse = mean((BACtrain$percent_change_next_weeks_price - BACtrain$train_preds)^2)
BACtrainmae = mean(abs(BACtrain$percent_change_next_weeks_price - BACtrain$train_preds))
train_error = cbind(train_error, BACtrainmse)
train_error = cbind(train_error, BACtrainmae)

BACtestmse = mean((BACtest$percent_change_next_weeks_price - BACtest$test_preds)^2)
BACtestmae = mean(abs(BACtest$percent_change_next_weeks_price - BACtest$test_preds))
train_error = cbind(train_error, BACtestmse)
train_error = cbind(train_error, BACtestmae)

XOM Regular Linear Model

set.seed(42)
#Grabbing the columns without NAs
XOMtrain = dow.train[dow.train$stock == "XOM",c(3:9,12,13)]
XOMtest = dow.test[dow.test$stock == "XOM",c(3:9,12,13)]

#Changing the index to be date and removing the date from the data frame
row.names(XOMtrain) = XOMtrain$date
XOMtrain = XOMtrain[,-1]
row.names(XOMtest) = XOMtest$date
XOMtest = XOMtest[,-1]

XOMlm = lm(percent_change_next_weeks_price ~ ., XOMtrain)
summary(XOMlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain)
## 
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18 
##     0.8232    -0.2011    -2.6797     0.4425     0.7345     1.4395    -0.4639 
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25 
##    -0.7120    -0.5397    -0.1419     0.4795     0.8191 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            9.893e+01  3.820e+01   2.590   0.0607 .
## open                  -2.560e+01  1.930e+01  -1.327   0.2553  
## high                   1.202e+00  1.872e+00   0.642   0.5556  
## low                   -4.425e-01  8.976e-01  -0.493   0.6479  
## close                  2.372e+01  1.788e+01   1.326   0.2554  
## volume                -6.114e-08  5.812e-08  -1.052   0.3522  
## percent_change_price  -2.069e+01  1.523e+01  -1.358   0.2459  
## days_to_next_dividend -3.303e-02  4.563e-02  -0.724   0.5093  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.778 on 4 degrees of freedom
## Multiple R-squared:  0.7973, Adjusted R-squared:  0.4426 
## F-statistic: 2.248 on 7 and 4 DF,  p-value: 0.2264
XOMlm = ols_step_backward_p(XOMlm, prem = 0.05, details = F)
XOMlm$model
## 
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")), 
##     data = l)
## 
## Coefficients:
## (Intercept)         open  
##     29.8502      -0.3595
XOMlm = lm(percent_change_next_weeks_price ~ open, XOMtrain)
summary(XOMlm)
## 
## Call:
## lm(formula = percent_change_next_weeks_price ~ open, data = XOMtrain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3141 -1.4107  0.0342  0.8313  3.1983 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  29.8502    12.2318   2.440   0.0348 *
## open         -0.3595     0.1510  -2.381   0.0385 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.995 on 10 degrees of freedom
## Multiple R-squared:  0.3618, Adjusted R-squared:  0.298 
## F-statistic: 5.669 on 1 and 10 DF,  p-value: 0.03855
XOMtrain$train_preds = predict(XOMlm, data = XOMtrain, type = "response")
XOMtest$test_preds = predict(XOMlm, newdata = XOMtest, type = "response")
ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =train_preds)) +
  theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =test_preds)) +
  theme_minimal()

XOMtrainmse = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$train_preds)^2)
XOMtrainmae = mean(abs(XOMtrain$percent_change_next_weeks_price - XOMtrain$train_preds))
train_error = cbind(train_error, XOMtrainmse)
train_error = cbind(train_error, XOMtrainmae)

XOMtestmse = mean((XOMtest$percent_change_next_weeks_price - XOMtest$test_preds)^2)
XOMtestmae = mean(abs(XOMtest$percent_change_next_weeks_price - XOMtest$test_preds))
train_error = cbind(train_error, XOMtestmse)
train_error = cbind(train_error, XOMtestmae)

AA Random Forest

library(tree)
## Warning: package 'tree' was built under R version 4.0.4
set.seed(42)

tree.AA = tree(percent_change_next_weeks_price ~ ., AAtrain[,c(1:8)])
summary(tree.AA)
## 
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = AAtrain[, 
##     c(1:8)])
## Variables actually used in tree construction:
## [1] "open"
## Number of terminal nodes:  2 
## Residual mean deviance:  6.918 = 69.18 / 10 
## Distribution of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -5.9570 -0.7440  0.1273  0.0000  0.7268  4.4050
plot(tree.AA)
text(tree.AA, pretty = 0)

AXP Random Forest

set.seed(42)

tree.AXP = tree(percent_change_next_weeks_price ~ ., AXPtrain[,c(1:8)])
summary(tree.AXP)
## 
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[, 
##     c(1:8)])
## Variables actually used in tree construction:
## [1] "high"
## Number of terminal nodes:  2 
## Residual mean deviance:  4.84 = 48.4 / 10 
## Distribution of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.9270 -1.5220 -0.3143  0.0000  1.5160  3.9290
plot(tree.AXP)
text(tree.AXP, pretty = 0)

BA Random Forest

set.seed(42)

tree.BA = tree(percent_change_next_weeks_price ~ ., BAtrain[,c(1:8)])
summary(tree.BA)
## 
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = BAtrain[, 
##     c(1:8)])
## Variables actually used in tree construction:
## [1] "close"
## Number of terminal nodes:  2 
## Residual mean deviance:  2.473 = 24.73 / 10 
## Distribution of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.3610 -1.1180  0.3441  0.0000  1.0500  2.2420
plot(tree.BA)
text(tree.BA, pretty = 0)

BAC Random Forest

set.seed(42)

tree.BAC = tree(percent_change_next_weeks_price ~ ., BACtrain[,c(1:8)])
summary(tree.BAC)
## 
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = BACtrain[, 
##     c(1:8)])
## Variables actually used in tree construction:
## [1] "low"
## Number of terminal nodes:  2 
## Residual mean deviance:  13.22 = 132.2 / 10 
## Distribution of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -7.1380 -2.0050  0.5191  0.0000  1.6260  6.5400
plot(tree.BAC)
text(tree.BAC, pretty = 0)

XOM Random Forest

set.seed(42)

tree.XOM = tree(percent_change_next_weeks_price ~ ., XOMtrain[,c(1:8)])
summary(tree.XOM)
## 
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[, 
##     c(1:8)])
## Variables actually used in tree construction:
## [1] "close"
## Number of terminal nodes:  2 
## Residual mean deviance:  3.823 = 38.23 / 10 
## Distribution of residuals:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.4150 -0.7656  0.1722  0.0000  1.4280  2.2470
plot(tree.XOM)
text(tree.XOM, pretty = 0)

AA

Linear SVM

library(e1071)

set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
##    gamma cost
## 11  0.01  0.2
AAsvm = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(AAsvm)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[, 
##     c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, 
##     cost = tuned.svm$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  linear 
##        cost:  0.2 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
AAtrain$svmtestpredict = predict(AAsvm, AAtrain, type = "response")
AAtest$svmtestpredict = predict(AAsvm, AAtest, type = "response")

ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

AAtrainmselin = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict)^2)

AAtestmselin = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict)^2)
svmtrain_error = cbind(AAtrainmselin, AAtestmselin)

Polynomial SVM

set.seed(1)
tuned.svm2 = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
##    gamma cost
## 89  0.09  0.9
AAsvm2 = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(AAsvm2)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[, 
##     c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, 
##     cost = tuned.svm2$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  polynomial 
##        cost:  0.9 
##      degree:  3 
##       gamma:  0.09 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  12
AAtrain$svmtestpredict2 = predict(AAsvm2, AAtrain, type = "response")
AAtest$svmtestpredict2 = predict(AAsvm2, AAtest, type = "response")

ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

AAtrainmsepoly = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AAtrainmsepoly)

AAtestmsepoly = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AAtestmsepoly)

Radial Basis SVM

set.seed(1)
tuned.svm3 = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
##    gamma cost
## 97  0.07    1
AAsvm3 = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(AAsvm3)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[, 
##     c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, 
##     cost = tuned.svm3$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.07 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
AAtrain$svmtestpredict3 = predict(AAsvm3, AAtrain, type = "response")
AAtest$svmtestpredict3 = predict(AAsvm3, AAtest, type = "response")

ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

AAtrainmserad = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AAtrainmserad)

AAtestmserad = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AAtestmserad)

Sigmoid SVM

set.seed(1)
tuned.svm4 = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
##     gamma cost
## 100   0.1    1
AAsvm4 = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(AAsvm4)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[, 
##     c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, 
##     cost = tuned.svm4$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  sigmoid 
##        cost:  1 
##       gamma:  0.1 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
AAtrain$svmtestpredict4 = predict(AAsvm4, AAtrain, type = "response")
AAtest$svmtestpredict4 = predict(AAsvm4, AAtest, type = "response")

ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

AAtrainmsesig = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AAtrainmsesig)

AAtestmsesig = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AAtestmsesig)

AXP

Linear SVM

set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
##    gamma cost
## 11  0.01  0.2
AXPsvm = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(AXPsvm)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[, 
##     c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, 
##     cost = tuned.svm$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  linear 
##        cost:  0.2 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
AXPtrain$svmtestpredict = predict(AXPsvm, AXPtrain, type = "response")
AXPtest$svmtestpredict = predict(AXPsvm, AXPtest, type = "response")

ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

AXPtrainmselin = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmselin)
AXPtestmselin = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmselin)

Polynomial SVM

set.seed(1)
tuned.svm2 = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
##    gamma cost
## 88  0.08  0.9
AXPsvm2 = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(AXPsvm2)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[, 
##     c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, 
##     cost = tuned.svm2$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  polynomial 
##        cost:  0.9 
##      degree:  3 
##       gamma:  0.08 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
AXPtrain$svmtestpredict2 = predict(AXPsvm2, AXPtrain, type = "response")
AXPtest$svmtestpredict2 = predict(AXPsvm2, AXPtest, type = "response")

ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

AXPtrainmsepoly = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmsepoly)

AXPtestmsepoly = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmsepoly)

Radial Basis SVM

set.seed(1)
tuned.svm3 = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
##    gamma cost
## 95  0.05    1
AXPsvm3 = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(AXPsvm3)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[, 
##     c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, 
##     cost = tuned.svm3$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.05 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
AXPtrain$svmtestpredict3 = predict(AXPsvm3, AXPtrain, type = "response")
AXPtest$svmtestpredict3 = predict(AXPsvm3, AXPtest, type = "response")

ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

AXPtrainmserad = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmserad)

AXPtestmserad = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmserad)

Sigmoid SVM

set.seed(1)
tuned.svm4 = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
##    gamma cost
## 96  0.06    1
AXPsvm4 = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(AXPsvm4)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[, 
##     c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, 
##     cost = tuned.svm4$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  sigmoid 
##        cost:  1 
##       gamma:  0.06 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  12
AXPtrain$svmtestpredict4 = predict(AXPsvm4, AXPtrain, type = "response")
AXPtest$svmtestpredict4 = predict(AXPsvm4, AXPtest, type = "response")

ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

AXPtrainmsesig = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmsesig)

AXPtestmsesig = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmsesig)

BA

Linear SVM

set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
##   gamma cost
## 1  0.01  0.1
BAsvm = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(BAsvm)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[, 
##     c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, 
##     cost = tuned.svm$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  linear 
##        cost:  0.1 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
BAtrain$svmtestpredict = predict(BAsvm, BAtrain, type = "response")
BAtest$svmtestpredict = predict(BAsvm, BAtest, type = "response")

ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

BAtrainmselin = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmselin)
BAtestmselin = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmselin)

Polynomial SVM

set.seed(1)
tuned.svm2 = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
##    gamma cost
## 92  0.02    1
BAsvm2 = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(BAsvm2)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[, 
##     c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, 
##     cost = tuned.svm2$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  3 
##       gamma:  0.02 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  10
BAtrain$svmtestpredict2 = predict(BAsvm2, BAtrain, type = "response")
BAtest$svmtestpredict2 = predict(BAsvm2, BAtest, type = "response")

ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

BAtrainmsepoly = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmsepoly)

BAtestmsepoly = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmsepoly)

Radial Basis SVM

set.seed(1)
tuned.svm3 = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
##     gamma cost
## 100   0.1    1
BAsvm3 = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(BAsvm3)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[, 
##     c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, 
##     cost = tuned.svm3$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  1 
##       gamma:  0.1 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
BAtrain$svmtestpredict3 = predict(BAsvm3, BAtrain, type = "response")
BAtest$svmtestpredict3 = predict(BAsvm3, BAtest, type = "response")

ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

BAtrainmserad = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmserad)

BAtestmserad = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmserad)

Sigmoid SVM

set.seed(1)
tuned.svm4 = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
##    gamma cost
## 16  0.06  0.2
BAsvm4 = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(BAsvm4)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[, 
##     c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, 
##     cost = tuned.svm4$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  sigmoid 
##        cost:  0.2 
##       gamma:  0.06 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  12
BAtrain$svmtestpredict4 = predict(BAsvm4, BAtrain, type = "response")
BAtest$svmtestpredict4 = predict(BAsvm4, BAtest, type = "response")

ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

BAtrainmsesig = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmsesig)

BAtestmsesig = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmsesig)

BAC

Linear SVM

set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
##    gamma cost
## 91  0.01    1
BACsvm = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(BACsvm)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[, 
##     c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, 
##     cost = tuned.svm$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  linear 
##        cost:  1 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  9
BACtrain$svmtestpredict = predict(BACsvm, BACtrain, type = "response")
BACtest$svmtestpredict = predict(BACsvm, BACtest, type = "response")

ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

BACtrainmselin = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmselin)
BACtestmselin = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmselin)

Polynomial SVM

set.seed(1)
tuned.svm2 = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
##     gamma cost
## 100   0.1    1
BACsvm2 = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(BACsvm2)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[, 
##     c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, 
##     cost = tuned.svm2$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  3 
##       gamma:  0.1 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  10
BACtrain$svmtestpredict2 = predict(BACsvm2, BACtrain, type = "response")
BACtest$svmtestpredict2 = predict(BACsvm2, BACtest, type = "response")

ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

BACtrainmsepoly = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmsepoly)

BACtestmsepoly = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmsepoly)

Radial Basis SVM

set.seed(1)
tuned.svm3 = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
##   gamma cost
## 1  0.01  0.1
BACsvm3 = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(BACsvm3)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[, 
##     c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, 
##     cost = tuned.svm3$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  0.1 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  10
BACtrain$svmtestpredict3 = predict(BACsvm3, BACtrain, type = "response")
BACtest$svmtestpredict3 = predict(BACsvm3, BACtest, type = "response")

ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

BACtrainmserad = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmserad)

BACtestmserad = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmserad)

Sigmoid SVM

set.seed(1)
tuned.svm4 = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
##   gamma cost
## 1  0.01  0.1
BACsvm4 = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(BACsvm4)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[, 
##     c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, 
##     cost = tuned.svm4$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  sigmoid 
##        cost:  0.1 
##       gamma:  0.01 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  10
BACtrain$svmtestpredict4 = predict(BACsvm4, BACtrain, type = "response")
BACtest$svmtestpredict4 = predict(BACsvm4, BACtest, type = "response")

ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

BACtrainmsesig = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmsesig)

BACtestmsesig = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmsesig)

XOM

Linear SVM

set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
##   gamma cost
## 1  0.01  0.1
XOMsvm = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(XOMsvm)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[, 
##     c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, 
##     cost = tuned.svm$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  linear 
##        cost:  0.1 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  12
XOMtrain$svmtestpredict = predict(XOMsvm, XOMtrain, type = "response")
XOMtest$svmtestpredict = predict(XOMsvm, XOMtest, type = "response")

ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict)) +
  theme_minimal()

XOMtrainmselin = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmselin)
XOMtestmselin = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmselin)

Polynomial SVM

set.seed(1)
tuned.svm2 = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
##     gamma cost
## 100   0.1    1
XOMsvm2 = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(XOMsvm2)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[, 
##     c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, 
##     cost = tuned.svm2$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  3 
##       gamma:  0.1 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
XOMtrain$svmtestpredict2 = predict(XOMsvm2, XOMtrain, type = "response")
XOMtest$svmtestpredict2 = predict(XOMsvm2, XOMtest, type = "response")

ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict2)) +
  theme_minimal()

XOMtrainmsepoly = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmsepoly)

XOMtestmsepoly = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmsepoly)

Radial Basis SVM

set.seed(1)
tuned.svm3 = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
##    gamma cost
## 31  0.01  0.4
XOMsvm3 = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(XOMsvm3)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[, 
##     c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, 
##     cost = tuned.svm3$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  0.4 
##       gamma:  0.01 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
XOMtrain$svmtestpredict3 = predict(XOMsvm3, XOMtrain, type = "response")
XOMtest$svmtestpredict3 = predict(XOMsvm3, XOMtest, type = "response")

ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict3)) +
  theme_minimal()

XOMtrainmserad = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmserad)

XOMtestmserad = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmserad)

Sigmoid SVM

set.seed(1)
tuned.svm4 = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
##    gamma cost
## 51  0.01  0.6
XOMsvm4 = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(XOMsvm4)
## 
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[, 
##     c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, 
##     cost = tuned.svm4$best.parameters$cost)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  sigmoid 
##        cost:  0.6 
##       gamma:  0.01 
##      coef.0:  0 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  11
XOMtrain$svmtestpredict4 = predict(XOMsvm4, XOMtrain, type = "response")
XOMtest$svmtestpredict4 = predict(XOMsvm4, XOMtest, type = "response")

ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) + 
  geom_point(col="red") +
  geom_point(aes(y =svmtestpredict4)) +
  theme_minimal()

XOMtrainmsesig = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmsesig)

XOMtestmsesig = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmsesig)