require(tidyverse)
packages <- c('ISLR', 'broom', 'knitr', 'kableExtra', 'caret', 'pROC')

walk(packages, require, character.only = TRUE, quietly = TRUE)

Sækja dataset og splitta

Prófa að fitta á stock market dataset.

data(Weekly)

Weekly <- Weekly %>%
    select(-Year, -Volume, -Today)
set.seed(1)
inTest <- createDataPartition(Weekly$Direction, p = 0.2, list = FALSE)

testing <- Weekly[inTest,]
training <- Weekly[-inTest,]
set.seed(1)
inVal <- createDataPartition(training$Direction, p = 0.3, list = FALSE)

blending <- training[inVal,]
ensemble <- training[-inVal,]

Nota sama resampling á öll models

myControl <- trainControl(method = 'cv', 
                          number = 5, 
                          verboseIter = FALSE, classProbs = TRUE)

Models

Nota boosted trees, random forest og naive bayes sem models.

set.seed(1)
model_gbm <- train(x = ensemble[,-6], y = ensemble[,6], 
                   method = 'gbm', trControl = myControl,
                   verbose = FALSE)
set.seed(1)
model_rf <- train(x = ensemble[,-6], y = ensemble[,6],
                  method = 'ranger', trControl = myControl)
set.seed(1)
model_bayes <- train(x = ensemble[,-6], y = ensemble[,6],
                   method = 'naive_bayes', trControl = myControl)

Bæta spám aftur í datasets

blending$gbmProb <- predict(model_gbm, blending[,1:5], type = 'prob')[,1]
blending$rfProb <- predict(model_rf, blending[,1:5], type = 'prob')[,1]
blending$bayesProb <- predict(model_bayes, blending[,1:5], type = 'prob')[,1]

testing$gbmProb <- predict(model_gbm, testing[,1:5], type = 'prob')[,1]
testing$rfProb <- predict(model_rf, testing[,1:5], type = 'prob')[,1]
testing$bayesProb <- predict(model_bayes, testing[,1:5], type = 'prob')[,1]

Lokamodel: boosted trees

set.seed(1)
ensemble_model <- train(x = blending[,-6], y = blending[,6], 
                        method = 'gbm',
                        trControl = myControl,
                        verbose = F)

preds <- predict(ensemble_model, testing, type = 'prob')
postResample(predict(ensemble_model, testing), testing$Direction)

##   Accuracy      Kappa 
## 0.53669725 0.05135717

ensemble.roc <- roc(testing$Direction, preds$Down)
print(ensemble.roc)

## 
## Call:
## roc.default(response = testing$Direction, predictor = preds$Down)
## 
## Data: preds$Down in 97 controls (testing$Direction Down) > 121 cases (testing$Direction Up).
## Area under the curve: 0.555

plot(ensemble.roc)

EnsembleTest

Brynjólfur Gauti Jónsson

2/28/2018

Sækja dataset og splitta

Nota sama resampling á öll models

Models

Bæta spám aftur í datasets

Lokamodel: boosted trees