setwd("C:/Users/arami/Desktop/DA 6813/CASE STUDY 3")
dow = read.table("dow_jones_index.data", sep = ",", header = TRUE)
str(dow)
## 'data.frame': 750 obs. of 16 variables:
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ stock : chr "AA" "AA" "AA" "AA" ...
## $ date : chr "1/7/2011" "1/14/2011" "1/21/2011" "1/28/2011" ...
## $ open : chr "$15.82" "$16.71" "$16.19" "$15.87" ...
## $ high : chr "$16.72" "$16.71" "$16.38" "$16.63" ...
## $ low : chr "$15.78" "$15.64" "$15.60" "$15.82" ...
## $ close : chr "$16.42" "$15.97" "$15.79" "$16.13" ...
## $ volume : int 239655616 242963398 138428495 151379173 154387761 114691279 80023895 132981863 109493077 114332562 ...
## $ percent_change_price : num 3.79 -4.43 -2.47 1.64 5.93 ...
## $ percent_change_volume_over_last_wk: num NA 1.38 -43.02 9.36 1.99 ...
## $ previous_weeks_volume : int NA 239655616 242963398 138428495 151379173 154387761 114691279 80023895 132981863 109493077 ...
## $ next_weeks_open : chr "$16.71" "$16.19" "$15.87" "$16.18" ...
## $ next_weeks_close : chr "$15.97" "$15.79" "$16.13" "$17.14" ...
## $ percent_change_next_weeks_price : num -4.428 -2.471 1.638 5.933 0.231 ...
## $ days_to_next_dividend : int 26 19 12 5 97 90 83 76 69 62 ...
## $ percent_return_next_dividend : num 0.183 0.188 0.19 0.186 0.175 ...
dow$date = lubridate::mdy(dow$date)
dow$open = as.numeric(gsub("\\$", "", dow$open))
dow$high = as.numeric(gsub("\\$", "", dow$high))
dow$low = as.numeric(gsub("\\$", "", dow$low))
dow$close = as.numeric(gsub("\\$", "", dow$close))
dow$volume = as.numeric(dow$volume)
dow$next_weeks_open = as.numeric(gsub("\\$", "", dow$next_weeks_open))
dow$next_weeks_close = as.numeric(gsub("\\$", "", dow$next_weeks_close))
str(dow)
## 'data.frame': 750 obs. of 16 variables:
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ stock : chr "AA" "AA" "AA" "AA" ...
## $ date : Date, format: "2011-01-07" "2011-01-14" ...
## $ open : num 15.8 16.7 16.2 15.9 16.2 ...
## $ high : num 16.7 16.7 16.4 16.6 17.4 ...
## $ low : num 15.8 15.6 15.6 15.8 16.2 ...
## $ close : num 16.4 16 15.8 16.1 17.1 ...
## $ volume : num 2.40e+08 2.43e+08 1.38e+08 1.51e+08 1.54e+08 ...
## $ percent_change_price : num 3.79 -4.43 -2.47 1.64 5.93 ...
## $ percent_change_volume_over_last_wk: num NA 1.38 -43.02 9.36 1.99 ...
## $ previous_weeks_volume : int NA 239655616 242963398 138428495 151379173 154387761 114691279 80023895 132981863 109493077 ...
## $ next_weeks_open : num 16.7 16.2 15.9 16.2 17.3 ...
## $ next_weeks_close : num 16 15.8 16.1 17.1 17.4 ...
## $ percent_change_next_weeks_price : num -4.428 -2.471 1.638 5.933 0.231 ...
## $ days_to_next_dividend : int 26 19 12 5 97 90 83 76 69 62 ...
## $ percent_return_next_dividend : num 0.183 0.188 0.19 0.186 0.175 ...
dow2 = dow[, c(1:11,14,15,16)]
dow.train = dow2[dow2$quarter == 1,]
dow.test = dow2[dow2$quarter == 2,]
AA Regular Linear Model
set.seed(42)
#Grabbing the columns without NAs
AAtrain = dow.train[dow.train$stock == "AA",c(3:9,12,13)]
AAtest = dow.test[dow.test$stock == "AA",c(3:9,12,13)]
#Changing the index to be date and removing the date from the data frame
row.names(AAtrain) = AAtrain$date
AAtrain = AAtrain[,-1]
row.names(AAtest) = AAtest$date
AAtest = AAtest[,-1]
AAlm = lm(percent_change_next_weeks_price ~ ., AAtrain)
summary(AAlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = AAtrain)
##
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18
## -2.3878 1.2061 -2.2751 3.0591 0.1602 1.3261 -0.1014
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25
## 0.7606 -2.8699 -0.2603 2.0997 -0.7173
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.152e+01 6.622e+01 0.929 0.405
## open -1.704e+02 2.417e+02 -0.705 0.520
## high 3.863e+00 9.696e+00 0.398 0.711
## low -8.221e-02 5.356e+00 -0.015 0.988
## close 1.631e+02 2.371e+02 0.688 0.529
## volume -2.655e-08 3.645e-08 -0.728 0.507
## percent_change_price -2.718e+01 3.932e+01 -0.691 0.527
## days_to_next_dividend -8.842e-03 7.007e-02 -0.126 0.906
##
## Residual standard error: 3.053 on 4 degrees of freedom
## Multiple R-squared: 0.6403, Adjusted R-squared: 0.01089
## F-statistic: 1.017 on 7 and 4 DF, p-value: 0.5254
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.0.3
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
AAlm = ols_step_backward_p(AAlm, prem = 0.05, details = F)
AAlm$model
##
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")),
## data = l)
##
## Coefficients:
## (Intercept) open volume
## 7.465e+01 -4.179e+00 -3.903e-08
AAlm = lm(percent_change_next_weeks_price ~ open + volume, AAtrain)
summary(AAlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + volume,
## data = AAtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6203 -0.6679 0.0684 1.4673 3.5047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.465e+01 2.478e+01 3.013 0.0147 *
## open -4.179e+00 1.437e+00 -2.909 0.0174 *
## volume -3.903e-08 1.522e-08 -2.564 0.0305 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.309 on 9 degrees of freedom
## Multiple R-squared: 0.5373, Adjusted R-squared: 0.4345
## F-statistic: 5.226 on 2 and 9 DF, p-value: 0.03117
AAtrain$train_preds = predict(AAlm, data = AAtrain, type = "response")
AAtest$test_preds = predict(AAlm, newdata = AAtest, type = "response")
library(car)
## Loading required package: carData
vif(AAlm)
## open volume
## 1.250613 1.250613
par(mfrow=c(2,2))
plot(AAlm, which=c(1:4))

library(ggplot2)
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds)) +
theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) +
geom_point(col="red") +
geom_point(aes(y =test_preds)) +
theme_minimal()

AAtrainmse = mean((AAtrain$percent_change_next_weeks_price - AAtrain$train_preds)^2)
AAtrainmae = mean(abs(AAtrain$percent_change_next_weeks_price - AAtrain$train_preds))
train_error = cbind(AAtrainmse, AAtrainmae)
AAtestmse = mean((AAtest$percent_change_next_weeks_price - AAtest$test_preds)^2)
AAtestmae = mean(abs(AAtest$percent_change_next_weeks_price - AAtest$test_preds))
train_error = cbind(train_error, AAtestmse)
train_error = cbind(train_error, AAtestmae)
Linear Model Without Lag
day = seq(from = 1, to = dim(AAtrain)[1], by = 1)
AAlm2 = lm(AAtrain$percent_change_next_weeks_price ~ day - 1 )
summary(AAlm2)
##
## Call:
## lm(formula = AAtrain$percent_change_next_weeks_price ~ day -
## 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4977 -2.3408 -0.5816 1.2232 5.6563
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## day 0.06923 0.11866 0.583 0.571
##
## Residual standard error: 3.025 on 11 degrees of freedom
## Multiple R-squared: 0.03001, Adjusted R-squared: -0.05817
## F-statistic: 0.3404 on 1 and 11 DF, p-value: 0.5714
AAtrain$train_preds2 = predict(AAlm2, data = AAtrain, type = "response")
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds2)) +
theme_minimal()

Checking for lags
lag.plot(AAtrain$percent_change_next_weeks_price, pch = ".", set.lags = 1:4)

lag.plot(AAtrain$close, pch = ".", set.lags = 1:4)

lag.plot(AAtrain$open, pch = ".", set.lags = 1:4)

lag.plot(AAtrain$percent_change_price, pch = ".", set.lags = 1:4)

AXP Regular Linear Model
set.seed(42)
#Grabbing the columns without NAs
AXPtrain = dow.train[dow.train$stock == "AXP",c(3:9,12,13)]
AXPtest = dow.test[dow.test$stock == "AXP",c(3:9,12,13)]
#Changing the index to be date and removing the date from the data frame
row.names(AXPtrain) = AXPtrain$date
AXPtrain = AXPtrain[,-1]
row.names(AXPtest) = AXPtest$date
AXPtest = AXPtest[,-1]
AXPlm = lm(percent_change_next_weeks_price ~ ., AXPtrain)
summary(AXPlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain)
##
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18
## 0.541448 0.064563 -0.044797 0.187946 0.003855 -0.501065 -0.425276
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25
## 0.164832 -1.449268 0.464004 0.164303 0.829456
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.807e+02 2.388e+01 7.564 0.00164 **
## open 8.042e+01 2.591e+01 3.104 0.03609 *
## high -5.673e+00 1.254e+00 -4.523 0.01063 *
## low 3.273e+00 1.067e+00 3.067 0.03739 *
## close -8.197e+01 2.565e+01 -3.196 0.03302 *
## volume 4.846e-08 7.206e-08 0.673 0.53810
## percent_change_price 3.664e+01 1.156e+01 3.171 0.03384 *
## days_to_next_dividend 3.484e-02 1.370e-02 2.543 0.06379 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9778 on 4 degrees of freedom
## Multiple R-squared: 0.9627, Adjusted R-squared: 0.8974
## F-statistic: 14.74 on 7 and 4 DF, p-value: 0.0103
AXPlm = ols_step_backward_p(AXPlm, prem = 0.05, details = F)
AXPlm$model
##
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")),
## data = l)
##
## Coefficients:
## (Intercept) open high
## 189.77500 82.95806 -5.40782
## low close percent_change_price
## 2.84547 -84.52516 37.73375
## days_to_next_dividend
## 0.03644
AXPlm = lm(percent_change_next_weeks_price ~ open + high + low + close + percent_change_price + days_to_next_dividend, AXPtrain)
summary(AXPlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + high +
## low + close + percent_change_price + days_to_next_dividend,
## data = AXPtrain)
##
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18
## 0.626703 -0.412721 0.171839 0.219440 0.214900 -0.256396 -0.526855
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25
## -0.002862 -1.495415 0.493249 0.110962 0.857157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 189.77500 18.54834 10.231 0.000153 ***
## open 82.95806 24.19161 3.429 0.018652 *
## high -5.40782 1.12337 -4.814 0.004824 **
## low 2.84547 0.80875 3.518 0.016949 *
## close -84.52516 23.93431 -3.532 0.016712 *
## percent_change_price 37.73375 10.79514 3.495 0.017369 *
## days_to_next_dividend 0.03644 0.01273 2.862 0.035307 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9227 on 5 degrees of freedom
## Multiple R-squared: 0.9585, Adjusted R-squared: 0.9086
## F-statistic: 19.23 on 6 and 5 DF, p-value: 0.002607
vif(AXPlm)
## open high low
## 8200.952596 13.991812 6.905939
## close percent_change_price days_to_next_dividend
## 9406.878888 14505.652148 1.333786
AXPlm = lm(percent_change_next_weeks_price ~ open + high + low + close + days_to_next_dividend, AXPtrain)
summary(AXPlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + high +
## low + close + days_to_next_dividend, data = AXPtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0485 -0.5130 0.4404 0.7632 1.3007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 154.49290 26.36121 5.861 0.00109 **
## open -1.58602 0.80094 -1.980 0.09500 .
## high -2.32627 1.17945 -1.972 0.09604 .
## low 1.37747 1.17083 1.176 0.28395
## close -0.88196 0.83323 -1.058 0.33058
## days_to_next_dividend 0.04485 0.02118 2.118 0.07853 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.563 on 6 degrees of freedom
## Multiple R-squared: 0.857, Adjusted R-squared: 0.7378
## F-statistic: 7.189 on 5 and 6 DF, p-value: 0.0162
AXPtrain$train_preds = predict(AXPlm, data = AXPtrain, type = "response")
AXPtest$test_preds = predict(AXPlm, newdata = AXPtest, type = "response")
vif(AXPlm)
## open high low
## 3.132585 5.374669 5.043664
## close days_to_next_dividend
## 3.972850 1.286171
par(mfrow=c(2,2))
plot(AAlm, which=c(1:4))

ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds)) +
theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) +
geom_point(col="red") +
geom_point(aes(y =test_preds)) +
theme_minimal()

AXPtrainmse = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$train_preds)^2)
AXPtrainmae = mean(abs(AXPtrain$percent_change_next_weeks_price - AXPtrain$train_preds))
train_error = cbind(train_error, AXPtrainmse)
train_error = cbind(train_error, AXPtrainmae)
AXPtestmse = mean((AXPtest$percent_change_next_weeks_price - AXPtest$test_preds)^2)
AXPtestmae = mean(abs(AXPtest$percent_change_next_weeks_price - AXPtest$test_preds))
train_error = cbind(train_error, AXPtestmse)
train_error = cbind(train_error, AXPtestmae)
AXPlm2 = lm(AXPtrain$percent_change_next_weeks_price ~ day - 1 )
summary(AXPlm2)
##
## Call:
## lm(formula = AXPtrain$percent_change_next_weeks_price ~ day -
## 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8347 -1.1246 -0.1756 0.9375 6.2151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## day 0.02632 0.12007 0.219 0.831
##
## Residual standard error: 3.061 on 11 degrees of freedom
## Multiple R-squared: 0.004349, Adjusted R-squared: -0.08617
## F-statistic: 0.04804 on 1 and 11 DF, p-value: 0.8305
AXPtrain$train_preds2 = predict(AXPlm2, data = AXPtrain, type = "response")
ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds2)) +
theme_minimal()

lag.plot(AXPtrain$percent_change_next_weeks_price, pch = ".", set.lags = 1:4)

lag.plot(AXPtrain$close, pch = ".", set.lags = 1:4)

lag.plot(AXPtrain$open, pch = ".", set.lags = 1:4)

lag.plot(AXPtrain$percent_change_price, pch = ".", set.lags = 1:4)

BA Regular Linear Model
set.seed(42)
#Grabbing the columns without NAs
BAtrain = dow.train[dow.train$stock == "BA",c(3:9,12,13)]
BAtest = dow.test[dow.test$stock == "BA",c(3:9,12,13)]
#Changing the index to be date and removing the date from the data frame
row.names(BAtrain) = BAtrain$date
BAtrain = BAtrain[,-1]
row.names(BAtest) = BAtest$date
BAtest = BAtest[,-1]
BAlm = lm(percent_change_next_weeks_price ~ ., BAtrain)
summary(BAlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = BAtrain)
##
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18
## -0.40080 -1.45053 -1.35852 1.04623 0.75875 1.12291 0.64740
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25
## -2.16209 0.01231 -1.86918 1.57167 2.08185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.640e+01 9.468e+01 1.018 0.366
## open 3.474e+00 3.746e+01 0.093 0.931
## high 2.378e+00 2.335e+00 1.018 0.366
## low -1.135e+00 1.288e+00 -0.881 0.428
## close -6.002e+00 3.684e+01 -0.163 0.878
## volume -3.849e-07 3.357e-07 -1.146 0.316
## percent_change_price 2.760e+00 2.580e+01 0.107 0.920
## days_to_next_dividend 6.674e-03 4.085e-02 0.163 0.878
##
## Residual standard error: 2.368 on 4 degrees of freedom
## Multiple R-squared: 0.5505, Adjusted R-squared: -0.236
## F-statistic: 0.6999 on 7 and 4 DF, p-value: 0.6814
BAlm = ols_step_backward_p(BAlm, prem = 0.05, details = F)
BAlm$model
##
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")),
## data = l)
##
## Coefficients:
## (Intercept) close
## 61.2202 -0.8537
BAlm = lm(percent_change_next_weeks_price ~ close, BAtrain)
summary(BAlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ close, data = BAtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2253 -0.5766 0.4247 0.9765 2.1131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.2202 26.4362 2.316 0.0431 *
## close -0.8537 0.3709 -2.302 0.0441 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.806 on 10 degrees of freedom
## Multiple R-squared: 0.3463, Adjusted R-squared: 0.2809
## F-statistic: 5.298 on 1 and 10 DF, p-value: 0.04413
BAtrain$train_preds = predict(BAlm, data = BAtrain, type = "response")
BAtest$test_preds = predict(BAlm, newdata = BAtest, type = "response")
ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds)) +
theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) +
geom_point(col="red") +
geom_point(aes(y =test_preds)) +
theme_minimal()

BAtrainmse = mean((BAtrain$percent_change_next_weeks_price - BAtrain$train_preds)^2)
BAtrainmae = mean(abs(BAtrain$percent_change_next_weeks_price - BAtrain$train_preds))
train_error = cbind(train_error, BAtrainmse)
train_error = cbind(train_error, BAtrainmae)
BAtestmse = mean((BAtest$percent_change_next_weeks_price - BAtest$test_preds)^2)
BAtestmae = mean(abs(BAtest$percent_change_next_weeks_price - BAtest$test_preds))
train_error = cbind(train_error, BAtestmse)
train_error = cbind(train_error, BAtestmae)
BAC Regular Linear Model
set.seed(42)
#Grabbing the columns without NAs
BACtrain = dow.train[dow.train$stock == "BAC",c(3:9,12,13)]
BACtest = dow.test[dow.test$stock == "BAC",c(3:9,12,13)]
#Changing the index to be date and removing the date from the data frame
row.names(BACtrain) = BACtrain$date
BACtrain = BACtrain[,-1]
row.names(BACtest) = BACtest$date
BACtest = BACtest[,-1]
BAClm = lm(percent_change_next_weeks_price ~ ., BACtrain)
summary(BAClm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = BACtrain)
##
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18
## -0.498049 -0.100233 -0.006617 1.644130 0.247643 0.455633 -0.634564
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25
## 0.093428 3.891850 -0.422557 -3.493422 -1.177242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.627e+01 5.191e+01 1.277 0.271
## open -3.311e+01 9.341e+01 -0.354 0.741
## high 3.970e+00 1.063e+01 0.373 0.728
## low 1.227e+01 5.891e+00 2.083 0.106
## close 1.213e+01 9.071e+01 0.134 0.900
## volume 7.965e-09 4.920e-09 1.619 0.181
## percent_change_price -3.296e+00 1.321e+01 -0.250 0.815
## days_to_next_dividend -3.966e-02 3.533e-02 -1.123 0.324
##
## Residual standard error: 2.853 on 4 degrees of freedom
## Multiple R-squared: 0.8104, Adjusted R-squared: 0.4786
## F-statistic: 2.443 on 7 and 4 DF, p-value: 0.2029
BAClm = ols_step_backward_p(BAClm, prem = 0.05, details = F)
BAClm$model
##
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")),
## data = l)
##
## Coefficients:
## (Intercept) open low
## 5.280e+01 -1.664e+01 1.276e+01
## volume percent_change_price
## 9.013e-09 -1.215e+00
BAClm = lm(percent_change_next_weeks_price ~ open + close + low + volume + percent_change_price, BACtrain)
summary(BAClm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + close +
## low + volume + percent_change_price, data = BACtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8797 -0.5751 -0.0326 1.0836 2.6131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.757e+01 4.437e+01 1.298 0.2421
## open 8.691e-01 6.946e+01 0.013 0.9904
## close -1.837e+01 7.270e+01 -0.253 0.8089
## low 1.333e+01 5.356e+00 2.488 0.0473 *
## volume 8.608e-09 3.958e-09 2.175 0.0726 .
## percent_change_price 1.349e+00 1.015e+01 0.133 0.8987
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.672 on 6 degrees of freedom
## Multiple R-squared: 0.7505, Adjusted R-squared: 0.5426
## F-statistic: 3.61 on 5 and 6 DF, p-value: 0.07477
vif(BAClm)
## open close low
## 997.892964 2108.264982 6.404622
## volume percent_change_price
## 1.454521 2627.307409
BAClm = lm(percent_change_next_weeks_price ~ open + close + low + volume, BACtrain)
summary(BAClm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ open + close +
## low + volume, data = BACtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9390 -0.5759 -0.0083 1.1697 2.5086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.510e+01 3.735e+01 1.475 0.1836
## open -8.350e+00 2.719e+00 -3.071 0.0180 *
## close -8.724e+00 2.956e+00 -2.951 0.0214 *
## low 1.306e+01 4.590e+00 2.844 0.0249 *
## volume 8.823e-09 3.349e-09 2.634 0.0337 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.478 on 7 degrees of freedom
## Multiple R-squared: 0.7498, Adjusted R-squared: 0.6068
## F-statistic: 5.244 on 4 and 7 DF, p-value: 0.02839
vif(BAClm)
## open close low volume
## 1.778670 4.054507 5.470889 1.211390
BACtrain$train_preds = predict(BAClm, data = BACtrain, type = "response")
BACtest$test_preds = predict(BAClm, newdata = BACtest, type = "response")
ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds)) +
theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) +
geom_point(col="red") +
geom_point(aes(y =test_preds)) +
theme_minimal()

BACtrainmse = mean((BACtrain$percent_change_next_weeks_price - BACtrain$train_preds)^2)
BACtrainmae = mean(abs(BACtrain$percent_change_next_weeks_price - BACtrain$train_preds))
train_error = cbind(train_error, BACtrainmse)
train_error = cbind(train_error, BACtrainmae)
BACtestmse = mean((BACtest$percent_change_next_weeks_price - BACtest$test_preds)^2)
BACtestmae = mean(abs(BACtest$percent_change_next_weeks_price - BACtest$test_preds))
train_error = cbind(train_error, BACtestmse)
train_error = cbind(train_error, BACtestmae)
XOM Regular Linear Model
set.seed(42)
#Grabbing the columns without NAs
XOMtrain = dow.train[dow.train$stock == "XOM",c(3:9,12,13)]
XOMtest = dow.test[dow.test$stock == "XOM",c(3:9,12,13)]
#Changing the index to be date and removing the date from the data frame
row.names(XOMtrain) = XOMtrain$date
XOMtrain = XOMtrain[,-1]
row.names(XOMtest) = XOMtest$date
XOMtest = XOMtest[,-1]
XOMlm = lm(percent_change_next_weeks_price ~ ., XOMtrain)
summary(XOMlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain)
##
## Residuals:
## 2011-01-07 2011-01-14 2011-01-21 2011-01-28 2011-02-04 2011-02-11 2011-02-18
## 0.8232 -0.2011 -2.6797 0.4425 0.7345 1.4395 -0.4639
## 2011-02-25 2011-03-04 2011-03-11 2011-03-18 2011-03-25
## -0.7120 -0.5397 -0.1419 0.4795 0.8191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.893e+01 3.820e+01 2.590 0.0607 .
## open -2.560e+01 1.930e+01 -1.327 0.2553
## high 1.202e+00 1.872e+00 0.642 0.5556
## low -4.425e-01 8.976e-01 -0.493 0.6479
## close 2.372e+01 1.788e+01 1.326 0.2554
## volume -6.114e-08 5.812e-08 -1.052 0.3522
## percent_change_price -2.069e+01 1.523e+01 -1.358 0.2459
## days_to_next_dividend -3.303e-02 4.563e-02 -0.724 0.5093
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.778 on 4 degrees of freedom
## Multiple R-squared: 0.7973, Adjusted R-squared: 0.4426
## F-statistic: 2.248 on 7 and 4 DF, p-value: 0.2264
XOMlm = ols_step_backward_p(XOMlm, prem = 0.05, details = F)
XOMlm$model
##
## Call:
## lm(formula = paste(response, "~", paste(preds, collapse = " + ")),
## data = l)
##
## Coefficients:
## (Intercept) open
## 29.8502 -0.3595
XOMlm = lm(percent_change_next_weeks_price ~ open, XOMtrain)
summary(XOMlm)
##
## Call:
## lm(formula = percent_change_next_weeks_price ~ open, data = XOMtrain)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3141 -1.4107 0.0342 0.8313 3.1983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.8502 12.2318 2.440 0.0348 *
## open -0.3595 0.1510 -2.381 0.0385 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.995 on 10 degrees of freedom
## Multiple R-squared: 0.3618, Adjusted R-squared: 0.298
## F-statistic: 5.669 on 1 and 10 DF, p-value: 0.03855
XOMtrain$train_preds = predict(XOMlm, data = XOMtrain, type = "response")
XOMtest$test_preds = predict(XOMlm, newdata = XOMtest, type = "response")
ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) +
geom_point(col="red") +
geom_point(aes(y =train_preds)) +
theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) +
geom_point(col="red") +
geom_point(aes(y =test_preds)) +
theme_minimal()

XOMtrainmse = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$train_preds)^2)
XOMtrainmae = mean(abs(XOMtrain$percent_change_next_weeks_price - XOMtrain$train_preds))
train_error = cbind(train_error, XOMtrainmse)
train_error = cbind(train_error, XOMtrainmae)
XOMtestmse = mean((XOMtest$percent_change_next_weeks_price - XOMtest$test_preds)^2)
XOMtestmae = mean(abs(XOMtest$percent_change_next_weeks_price - XOMtest$test_preds))
train_error = cbind(train_error, XOMtestmse)
train_error = cbind(train_error, XOMtestmae)
AA Random Forest
library(tree)
## Warning: package 'tree' was built under R version 4.0.4
set.seed(42)
tree.AA = tree(percent_change_next_weeks_price ~ ., AAtrain[,c(1:8)])
summary(tree.AA)
##
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = AAtrain[,
## c(1:8)])
## Variables actually used in tree construction:
## [1] "open"
## Number of terminal nodes: 2
## Residual mean deviance: 6.918 = 69.18 / 10
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -5.9570 -0.7440 0.1273 0.0000 0.7268 4.4050
plot(tree.AA)
text(tree.AA, pretty = 0)

AXP Random Forest
set.seed(42)
tree.AXP = tree(percent_change_next_weeks_price ~ ., AXPtrain[,c(1:8)])
summary(tree.AXP)
##
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[,
## c(1:8)])
## Variables actually used in tree construction:
## [1] "high"
## Number of terminal nodes: 2
## Residual mean deviance: 4.84 = 48.4 / 10
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.9270 -1.5220 -0.3143 0.0000 1.5160 3.9290
plot(tree.AXP)
text(tree.AXP, pretty = 0)

BA Random Forest
set.seed(42)
tree.BA = tree(percent_change_next_weeks_price ~ ., BAtrain[,c(1:8)])
summary(tree.BA)
##
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = BAtrain[,
## c(1:8)])
## Variables actually used in tree construction:
## [1] "close"
## Number of terminal nodes: 2
## Residual mean deviance: 2.473 = 24.73 / 10
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.3610 -1.1180 0.3441 0.0000 1.0500 2.2420
plot(tree.BA)
text(tree.BA, pretty = 0)

BAC Random Forest
set.seed(42)
tree.BAC = tree(percent_change_next_weeks_price ~ ., BACtrain[,c(1:8)])
summary(tree.BAC)
##
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = BACtrain[,
## c(1:8)])
## Variables actually used in tree construction:
## [1] "low"
## Number of terminal nodes: 2
## Residual mean deviance: 13.22 = 132.2 / 10
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -7.1380 -2.0050 0.5191 0.0000 1.6260 6.5400
plot(tree.BAC)
text(tree.BAC, pretty = 0)

XOM Random Forest
set.seed(42)
tree.XOM = tree(percent_change_next_weeks_price ~ ., XOMtrain[,c(1:8)])
summary(tree.XOM)
##
## Regression tree:
## tree(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[,
## c(1:8)])
## Variables actually used in tree construction:
## [1] "close"
## Number of terminal nodes: 2
## Residual mean deviance: 3.823 = 38.23 / 10
## Distribution of residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.4150 -0.7656 0.1722 0.0000 1.4280 2.2470
plot(tree.XOM)
text(tree.XOM, pretty = 0)

AA
Linear SVM
library(e1071)
set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
## gamma cost
## 11 0.01 0.2
AAsvm = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(AAsvm)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[,
## c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma,
## cost = tuned.svm$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 0.2
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
AAtrain$svmtestpredict = predict(AAsvm, AAtrain, type = "response")
AAtest$svmtestpredict = predict(AAsvm, AAtest, type = "response")
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

AAtrainmselin = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict)^2)
AAtestmselin = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict)^2)
svmtrain_error = cbind(AAtrainmselin, AAtestmselin)
Polynomial SVM
set.seed(1)
tuned.svm2 = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
## gamma cost
## 89 0.09 0.9
AAsvm2 = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(AAsvm2)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[,
## c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma,
## cost = tuned.svm2$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 0.9
## degree: 3
## gamma: 0.09
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 12
AAtrain$svmtestpredict2 = predict(AAsvm2, AAtrain, type = "response")
AAtest$svmtestpredict2 = predict(AAsvm2, AAtest, type = "response")
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

AAtrainmsepoly = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AAtrainmsepoly)
AAtestmsepoly = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AAtestmsepoly)
Radial Basis SVM
set.seed(1)
tuned.svm3 = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
## gamma cost
## 97 0.07 1
AAsvm3 = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(AAsvm3)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[,
## c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma,
## cost = tuned.svm3$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.07
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
AAtrain$svmtestpredict3 = predict(AAsvm3, AAtrain, type = "response")
AAtest$svmtestpredict3 = predict(AAsvm3, AAtest, type = "response")
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

AAtrainmserad = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AAtrainmserad)
AAtestmserad = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AAtestmserad)
Sigmoid SVM
set.seed(1)
tuned.svm4 = tune.svm(form1, data = AAtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
## gamma cost
## 100 0.1 1
AAsvm4 = svm(percent_change_next_weeks_price ~., data = AAtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(AAsvm4)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AAtrain[,
## c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma,
## cost = tuned.svm4$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: sigmoid
## cost: 1
## gamma: 0.1
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
AAtrain$svmtestpredict4 = predict(AAsvm4, AAtrain, type = "response")
AAtest$svmtestpredict4 = predict(AAsvm4, AAtest, type = "response")
ggplot(data = AAtrain, aes(y = percent_change_next_weeks_price, x = row.names(AAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

ggplot(data = AAtest, aes(y = percent_change_next_weeks_price, x = row.names(AAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

AAtrainmsesig = mean((AAtrain$percent_change_next_weeks_price - AAtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AAtrainmsesig)
AAtestmsesig = mean((AAtest$percent_change_next_weeks_price - AAtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AAtestmsesig)
AXP
Linear SVM
set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
## gamma cost
## 11 0.01 0.2
AXPsvm = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(AXPsvm)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[,
## c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma,
## cost = tuned.svm$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 0.2
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
AXPtrain$svmtestpredict = predict(AXPsvm, AXPtrain, type = "response")
AXPtest$svmtestpredict = predict(AXPsvm, AXPtest, type = "response")
ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

AXPtrainmselin = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmselin)
AXPtestmselin = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmselin)
Polynomial SVM
set.seed(1)
tuned.svm2 = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
## gamma cost
## 88 0.08 0.9
AXPsvm2 = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(AXPsvm2)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[,
## c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma,
## cost = tuned.svm2$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 0.9
## degree: 3
## gamma: 0.08
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
AXPtrain$svmtestpredict2 = predict(AXPsvm2, AXPtrain, type = "response")
AXPtest$svmtestpredict2 = predict(AXPsvm2, AXPtest, type = "response")
ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

AXPtrainmsepoly = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmsepoly)
AXPtestmsepoly = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmsepoly)
Radial Basis SVM
set.seed(1)
tuned.svm3 = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
## gamma cost
## 95 0.05 1
AXPsvm3 = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(AXPsvm3)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[,
## c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma,
## cost = tuned.svm3$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.05
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
AXPtrain$svmtestpredict3 = predict(AXPsvm3, AXPtrain, type = "response")
AXPtest$svmtestpredict3 = predict(AXPsvm3, AXPtest, type = "response")
ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

AXPtrainmserad = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmserad)
AXPtestmserad = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmserad)
Sigmoid SVM
set.seed(1)
tuned.svm4 = tune.svm(form1, data = AXPtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
## gamma cost
## 96 0.06 1
AXPsvm4 = svm(percent_change_next_weeks_price ~., data = AXPtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(AXPsvm4)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = AXPtrain[,
## c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma,
## cost = tuned.svm4$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: sigmoid
## cost: 1
## gamma: 0.06
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 12
AXPtrain$svmtestpredict4 = predict(AXPsvm4, AXPtrain, type = "response")
AXPtest$svmtestpredict4 = predict(AXPsvm4, AXPtest, type = "response")
ggplot(data = AXPtrain, aes(y = percent_change_next_weeks_price, x = row.names(AXPtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

ggplot(data = AXPtest, aes(y = percent_change_next_weeks_price, x = row.names(AXPtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

AXPtrainmsesig = mean((AXPtrain$percent_change_next_weeks_price - AXPtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AXPtrainmsesig)
AXPtestmsesig = mean((AXPtest$percent_change_next_weeks_price - AXPtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, AXPtestmsesig)
BA
Linear SVM
set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
## gamma cost
## 1 0.01 0.1
BAsvm = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(BAsvm)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[,
## c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma,
## cost = tuned.svm$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 0.1
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
BAtrain$svmtestpredict = predict(BAsvm, BAtrain, type = "response")
BAtest$svmtestpredict = predict(BAsvm, BAtest, type = "response")
ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

BAtrainmselin = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmselin)
BAtestmselin = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmselin)
Polynomial SVM
set.seed(1)
tuned.svm2 = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
## gamma cost
## 92 0.02 1
BAsvm2 = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(BAsvm2)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[,
## c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma,
## cost = tuned.svm2$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## gamma: 0.02
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 10
BAtrain$svmtestpredict2 = predict(BAsvm2, BAtrain, type = "response")
BAtest$svmtestpredict2 = predict(BAsvm2, BAtest, type = "response")
ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

BAtrainmsepoly = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmsepoly)
BAtestmsepoly = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmsepoly)
Radial Basis SVM
set.seed(1)
tuned.svm3 = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
## gamma cost
## 100 0.1 1
BAsvm3 = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(BAsvm3)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[,
## c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma,
## cost = tuned.svm3$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.1
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
BAtrain$svmtestpredict3 = predict(BAsvm3, BAtrain, type = "response")
BAtest$svmtestpredict3 = predict(BAsvm3, BAtest, type = "response")
ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

BAtrainmserad = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmserad)
BAtestmserad = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmserad)
Sigmoid SVM
set.seed(1)
tuned.svm4 = tune.svm(form1, data = BAtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
## gamma cost
## 16 0.06 0.2
BAsvm4 = svm(percent_change_next_weeks_price ~., data = BAtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(BAsvm4)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BAtrain[,
## c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma,
## cost = tuned.svm4$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: sigmoid
## cost: 0.2
## gamma: 0.06
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 12
BAtrain$svmtestpredict4 = predict(BAsvm4, BAtrain, type = "response")
BAtest$svmtestpredict4 = predict(BAsvm4, BAtest, type = "response")
ggplot(data = BAtrain, aes(y = percent_change_next_weeks_price, x = row.names(BAtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

ggplot(data = BAtest, aes(y = percent_change_next_weeks_price, x = row.names(BAtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

BAtrainmsesig = mean((BAtrain$percent_change_next_weeks_price - BAtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BAtrainmsesig)
BAtestmsesig = mean((BAtest$percent_change_next_weeks_price - BAtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BAtestmsesig)
BAC
Linear SVM
set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
## gamma cost
## 91 0.01 1
BACsvm = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(BACsvm)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[,
## c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma,
## cost = tuned.svm$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 1
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 9
BACtrain$svmtestpredict = predict(BACsvm, BACtrain, type = "response")
BACtest$svmtestpredict = predict(BACsvm, BACtest, type = "response")
ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

BACtrainmselin = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmselin)
BACtestmselin = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmselin)
Polynomial SVM
set.seed(1)
tuned.svm2 = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
## gamma cost
## 100 0.1 1
BACsvm2 = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(BACsvm2)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[,
## c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma,
## cost = tuned.svm2$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## gamma: 0.1
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 10
BACtrain$svmtestpredict2 = predict(BACsvm2, BACtrain, type = "response")
BACtest$svmtestpredict2 = predict(BACsvm2, BACtest, type = "response")
ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

BACtrainmsepoly = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmsepoly)
BACtestmsepoly = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmsepoly)
Radial Basis SVM
set.seed(1)
tuned.svm3 = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
## gamma cost
## 1 0.01 0.1
BACsvm3 = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(BACsvm3)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[,
## c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma,
## cost = tuned.svm3$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 0.1
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 10
BACtrain$svmtestpredict3 = predict(BACsvm3, BACtrain, type = "response")
BACtest$svmtestpredict3 = predict(BACsvm3, BACtest, type = "response")
ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

BACtrainmserad = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmserad)
BACtestmserad = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmserad)
Sigmoid SVM
set.seed(1)
tuned.svm4 = tune.svm(form1, data = BACtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
## gamma cost
## 1 0.01 0.1
BACsvm4 = svm(percent_change_next_weeks_price ~., data = BACtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(BACsvm4)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = BACtrain[,
## c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma,
## cost = tuned.svm4$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: sigmoid
## cost: 0.1
## gamma: 0.01
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 10
BACtrain$svmtestpredict4 = predict(BACsvm4, BACtrain, type = "response")
BACtest$svmtestpredict4 = predict(BACsvm4, BACtest, type = "response")
ggplot(data = BACtrain, aes(y = percent_change_next_weeks_price, x = row.names(BACtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

ggplot(data = BACtest, aes(y = percent_change_next_weeks_price, x = row.names(BACtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

BACtrainmsesig = mean((BACtrain$percent_change_next_weeks_price - BACtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BACtrainmsesig)
BACtestmsesig = mean((BACtest$percent_change_next_weeks_price - BACtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, BACtestmsesig)
XOM
Linear SVM
set.seed(1)
form1 = percent_change_next_weeks_price ~ .
tuned.svm = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "linear", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm$best.parameters
## gamma cost
## 1 0.01 0.1
XOMsvm = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma, cost = tuned.svm$best.parameters$cost)
summary(XOMsvm)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[,
## c(1:8)], kernel = "linear", gamma = tuned.svm$best.parameters$gamma,
## cost = tuned.svm$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: linear
## cost: 0.1
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 12
XOMtrain$svmtestpredict = predict(XOMsvm, XOMtrain, type = "response")
XOMtest$svmtestpredict = predict(XOMsvm, XOMtest, type = "response")
ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict)) +
theme_minimal()

XOMtrainmselin = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmselin)
XOMtestmselin = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmselin)
Polynomial SVM
set.seed(1)
tuned.svm2 = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "polynomial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm2$best.parameters
## gamma cost
## 100 0.1 1
XOMsvm2 = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma, cost = tuned.svm2$best.parameters$cost)
summary(XOMsvm2)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[,
## c(1:8)], kernel = "polynomial", gamma = tuned.svm2$best.parameters$gamma,
## cost = tuned.svm2$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: polynomial
## cost: 1
## degree: 3
## gamma: 0.1
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
XOMtrain$svmtestpredict2 = predict(XOMsvm2, XOMtrain, type = "response")
XOMtest$svmtestpredict2 = predict(XOMsvm2, XOMtest, type = "response")
ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict2)) +
theme_minimal()

XOMtrainmsepoly = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmsepoly)
XOMtestmsepoly = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict2)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmsepoly)
Radial Basis SVM
set.seed(1)
tuned.svm3 = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "radial", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm3$best.parameters
## gamma cost
## 31 0.01 0.4
XOMsvm3 = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma, cost = tuned.svm3$best.parameters$cost)
summary(XOMsvm3)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[,
## c(1:8)], kernel = "radial", gamma = tuned.svm3$best.parameters$gamma,
## cost = tuned.svm3$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 0.4
## gamma: 0.01
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
XOMtrain$svmtestpredict3 = predict(XOMsvm3, XOMtrain, type = "response")
XOMtest$svmtestpredict3 = predict(XOMsvm3, XOMtest, type = "response")
ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict3)) +
theme_minimal()

XOMtrainmserad = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmserad)
XOMtestmserad = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict3)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmserad)
Sigmoid SVM
set.seed(1)
tuned.svm4 = tune.svm(form1, data = XOMtrain[,c(1:8)],kernel = "sigmoid", gamma = seq(.01, .1, by = .01), cost = seq(.1, 1, by = .1))
tuned.svm4$best.parameters
## gamma cost
## 51 0.01 0.6
XOMsvm4 = svm(percent_change_next_weeks_price ~., data = XOMtrain[,c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma, cost = tuned.svm4$best.parameters$cost)
summary(XOMsvm4)
##
## Call:
## svm(formula = percent_change_next_weeks_price ~ ., data = XOMtrain[,
## c(1:8)], kernel = "sigmoid", gamma = tuned.svm4$best.parameters$gamma,
## cost = tuned.svm4$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: sigmoid
## cost: 0.6
## gamma: 0.01
## coef.0: 0
## epsilon: 0.1
##
##
## Number of Support Vectors: 11
XOMtrain$svmtestpredict4 = predict(XOMsvm4, XOMtrain, type = "response")
XOMtest$svmtestpredict4 = predict(XOMsvm4, XOMtest, type = "response")
ggplot(data = XOMtrain, aes(y = percent_change_next_weeks_price, x = row.names(XOMtrain))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

ggplot(data = XOMtest, aes(y = percent_change_next_weeks_price, x = row.names(XOMtest))) +
geom_point(col="red") +
geom_point(aes(y =svmtestpredict4)) +
theme_minimal()

XOMtrainmsesig = mean((XOMtrain$percent_change_next_weeks_price - XOMtrain$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, XOMtrainmsesig)
XOMtestmsesig = mean((XOMtest$percent_change_next_weeks_price - XOMtest$svmtestpredict4)^2)
svmtrain_error = cbind(svmtrain_error, XOMtestmsesig)