1 importing R package

library(readxl)
library(skimr)
library(VIM)
library(mice)
library(caret)
library(rpart.plot)

2 open data

data <- read_xlsx("看跌期权定价机器学习模型-updated.xlsx")
names(data) <- c("PP", "UP", "LNTV", "sigma", "rf","rp","K","T")

3 data cleaning

3.1 descriptive statistic

skim(data)
Data summary
Name data
Number of rows 1783
Number of columns 8
_______________________
Column type frequency:
numeric 8
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
PP 0 1 0.39 0.41 0.00 0.10 0.21 0.49 1.50 ▇▂▁▂▁
UP 0 1 4.98 0.14 4.80 4.88 4.93 5.08 5.33 ▇▅▃▂▂
LNTV 0 1 4.23 2.05 0.00 3.18 4.74 5.60 9.49 ▃▂▇▃▁
sigma 0 1 0.15 0.03 0.10 0.13 0.15 0.17 0.22 ▆▆▇▂▆
rf 0 1 0.02 0.00 0.02 0.02 0.02 0.02 0.03 ▁▂▇▇▂
rp 0 1 0.04 0.03 -0.02 0.02 0.04 0.05 0.10 ▂▇▇▅▂
K 0 1 5.09 0.57 4.34 4.64 4.93 5.25 6.25 ▇▇▅▁▆
T 0 1 0.37 0.17 0.04 0.24 0.38 0.51 0.66 ▅▃▇▆▆

3.2 visualize missing value

aggr(data, numbers = T, prop = F)

3.3 missing value imputation

data <- complete(mice(data, m = 5, maxit = 50, meth = 'rf', seed = 500))

4 Machinine Learning Model

4.1 Data Splitting

set.seed(100)
index <- createDataPartition(data$PP, p = 0.8, list = F)
train <- data[index,]
train
test <- data[-index,]
test

4.2 Random forest

4.2.1 Tuned random forest

set.seed(100)
tuneGrid <- expand.grid(mtry = 1:5)
rfModel <- train(PP ~., data = train, method = "rf", ntree = 50,
                 trControl = trainControl(method = "cv", number = 5),
                 tuneGrid = tuneGrid)

4.2.2 RMSE

predictions <- predict(rfModel, test)
rf_rmse <- RMSE(predictions, test$PP)
rf_rmse
## [1] 0.01744847

4.3 GBM

4.3.1 Tuned GBM

set.seed(100)
tuneGrid <- expand.grid(n.trees = 50, 
                        interaction.depth = c(1,2,3),
                        shrinkage = 0.01,
                        n.minobsinnode = c(5,10,15))
gbmModel <- train(PP ~., data = train, method = "gbm",
                  trControl = trainControl(method = "cv", number = 5),
                  tuneGrid = tuneGrid)

4.3.2 RMSE

predictions <- predict(gbmModel, test)
gbm_rmse <- RMSE(predictions, test$PP)
gbm_rmse
## [1] 0.2618784

4.4 Xgboost

4.4.1 Tuned Xgboost

set.seed(100)
tuneGrid <- expand.grid(nrounds = c(100, 200),
                        max_depth = c(10, 15),
                        colsample_bytree = seq(0.5, 0.9, length.out = 5),
                        eta = 0.1,
                        gamma = 0,
                        min_child_weight = 1,
                        subsample = 1)
xgboostModel <- train(PP ~., data = train, method = "xgbTree",
                      trControl = trainControl(method = "cv", number = 5),
                      tuneGrid = tuneGrid)
## [10:34:00] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:01] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:01] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:02] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:02] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:03] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:03] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:04] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:04] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:05] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:06] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:06] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:07] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:07] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:07] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:08] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:09] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:09] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:10] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:10] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:11] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:11] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:12] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:12] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:13] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:13] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:14] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:15] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:16] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:16] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:17] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:18] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:19] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:20] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:21] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:21] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:22] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:23] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:24] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:25] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:26] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:27] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:27] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:28] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:29] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:30] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:30] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:31] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:32] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:33] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.

4.4.2 RMSE

predictions <- predict(xgboostModel, test)
xgboost_rmse <- RMSE(predictions, test$PP)
xgboost_rmse
## [1] 0.03230221

4.5 Tree

4.5.1 Tuned Tree

set.seed(100)
tuneGrid = expand.grid(cp = seq(from = 0.001, to = 0.1, by = 0.001))
treeModel = train(PP ~., data = train, method = "rpart",
                  trControl = trainControl(method = "cv", number = 5),
                  tuneGrid = tuneGrid)

4.5.1.1 RMSE

predictions <- predict(treeModel, test)
tree_rmse <- RMSE(predictions, test$PP)
tree_rmse
## [1] 0.05365652

4.6 Different models comparasion

rf_rmse
## [1] 0.01744847
gbm_rmse
## [1] 0.2618784
xgboost_rmse
## [1] 0.03230221
tree_rmse
## [1] 0.05365652

随机森林模型的RMSE最低,性能最佳

5 Back testing

set.seed(100)
tuneGrid <- expand.grid(mtry = 1:5)
rfModel <- train(PP ~., data = data, method = "rf", ntree = 50,
                 trControl = trainControl(method = "cv", number = 5),
                 tuneGrid = tuneGrid)
df <- read_xlsx("预测数据1.xlsx")
names(df) <- c("UP", "LNTV", "sigma", "rf","rp","K", "T")
pred <- predict(rfModel, df)
df$PP <- pred
df