1 importing R package
library(readxl)
library(skimr)
library(VIM)
library(mice)
library(caret)
library(rpart.plot)2 open data
data <- read_xlsx("看跌期权定价机器学习模型-updated.xlsx")
names(data) <- c("PP", "UP", "LNTV", "sigma", "rf","rp","K","T")3 data cleaning
3.1 descriptive statistic
skim(data)| Name | data |
| Number of rows | 1783 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| numeric | 8 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| PP | 0 | 1 | 0.39 | 0.41 | 0.00 | 0.10 | 0.21 | 0.49 | 1.50 | ▇▂▁▂▁ |
| UP | 0 | 1 | 4.98 | 0.14 | 4.80 | 4.88 | 4.93 | 5.08 | 5.33 | ▇▅▃▂▂ |
| LNTV | 0 | 1 | 4.23 | 2.05 | 0.00 | 3.18 | 4.74 | 5.60 | 9.49 | ▃▂▇▃▁ |
| sigma | 0 | 1 | 0.15 | 0.03 | 0.10 | 0.13 | 0.15 | 0.17 | 0.22 | ▆▆▇▂▆ |
| rf | 0 | 1 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.03 | ▁▂▇▇▂ |
| rp | 0 | 1 | 0.04 | 0.03 | -0.02 | 0.02 | 0.04 | 0.05 | 0.10 | ▂▇▇▅▂ |
| K | 0 | 1 | 5.09 | 0.57 | 4.34 | 4.64 | 4.93 | 5.25 | 6.25 | ▇▇▅▁▆ |
| T | 0 | 1 | 0.37 | 0.17 | 0.04 | 0.24 | 0.38 | 0.51 | 0.66 | ▅▃▇▆▆ |
3.2 visualize missing value
aggr(data, numbers = T, prop = F)3.3 missing value imputation
data <- complete(mice(data, m = 5, maxit = 50, meth = 'rf', seed = 500))4 Machinine Learning Model
4.1 Data Splitting
set.seed(100)
index <- createDataPartition(data$PP, p = 0.8, list = F)
train <- data[index,]
traintest <- data[-index,]
test4.2 Random forest
4.2.1 Tuned random forest
set.seed(100)
tuneGrid <- expand.grid(mtry = 1:5)
rfModel <- train(PP ~., data = train, method = "rf", ntree = 50,
trControl = trainControl(method = "cv", number = 5),
tuneGrid = tuneGrid)4.2.2 RMSE
predictions <- predict(rfModel, test)
rf_rmse <- RMSE(predictions, test$PP)
rf_rmse## [1] 0.01744847
4.3 GBM
4.3.1 Tuned GBM
set.seed(100)
tuneGrid <- expand.grid(n.trees = 50,
interaction.depth = c(1,2,3),
shrinkage = 0.01,
n.minobsinnode = c(5,10,15))
gbmModel <- train(PP ~., data = train, method = "gbm",
trControl = trainControl(method = "cv", number = 5),
tuneGrid = tuneGrid)4.3.2 RMSE
predictions <- predict(gbmModel, test)
gbm_rmse <- RMSE(predictions, test$PP)
gbm_rmse## [1] 0.2618784
4.4 Xgboost
4.4.1 Tuned Xgboost
set.seed(100)
tuneGrid <- expand.grid(nrounds = c(100, 200),
max_depth = c(10, 15),
colsample_bytree = seq(0.5, 0.9, length.out = 5),
eta = 0.1,
gamma = 0,
min_child_weight = 1,
subsample = 1)
xgboostModel <- train(PP ~., data = train, method = "xgbTree",
trControl = trainControl(method = "cv", number = 5),
tuneGrid = tuneGrid)## [10:34:00] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:01] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:01] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:02] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:02] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:03] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:03] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:04] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:04] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:05] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:06] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:06] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:07] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:07] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:07] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:08] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:09] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:09] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:10] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:10] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:11] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:11] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:12] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:12] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:13] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:13] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:14] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:15] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:16] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:16] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:17] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:18] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:19] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:20] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:21] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:21] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:22] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:23] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:24] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:25] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:26] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:27] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:27] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:28] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:29] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:30] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:30] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:31] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:32] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
## [10:34:33] WARNING: amalgamation/../src/c_api/c_api.cc:718: `ntree_limit` is deprecated, use `iteration_range` instead.
4.4.2 RMSE
predictions <- predict(xgboostModel, test)
xgboost_rmse <- RMSE(predictions, test$PP)
xgboost_rmse## [1] 0.03230221
4.5 Tree
4.5.1 Tuned Tree
set.seed(100)
tuneGrid = expand.grid(cp = seq(from = 0.001, to = 0.1, by = 0.001))
treeModel = train(PP ~., data = train, method = "rpart",
trControl = trainControl(method = "cv", number = 5),
tuneGrid = tuneGrid)4.5.1.1 RMSE
predictions <- predict(treeModel, test)
tree_rmse <- RMSE(predictions, test$PP)
tree_rmse## [1] 0.05365652
4.6 Different models comparasion
rf_rmse## [1] 0.01744847
gbm_rmse## [1] 0.2618784
xgboost_rmse## [1] 0.03230221
tree_rmse## [1] 0.05365652
随机森林模型的RMSE最低,性能最佳
5 Back testing
set.seed(100)
tuneGrid <- expand.grid(mtry = 1:5)
rfModel <- train(PP ~., data = data, method = "rf", ntree = 50,
trControl = trainControl(method = "cv", number = 5),
tuneGrid = tuneGrid)
df <- read_xlsx("预测数据1.xlsx")
names(df) <- c("UP", "LNTV", "sigma", "rf","rp","K", "T")
pred <- predict(rfModel, df)
df$PP <- pred
df