Last Updated - 2016-06-08
The previous May model round 2 predicted model of 47536 managed to have an close approximate of May Round 2 Actual Premium 47020, with differences of $518. The model will now add in new data and attempt to forecast the June 2016 COE Premium Category A Round 1.
Data source is from here.
library(h2o)
## Loading required package: statmod
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## sd, var
## The following objects are masked from 'package:base':
##
## %*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames, colnames<-, ifelse,
## is.character, is.factor, is.numeric, log, log10, log1p, log2, round, signif, trunc
localH2O <- h2o.init(nthreads = -1)
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## C:\Users\admin\AppData\Local\Temp\Rtmpwp73tI/h2o_admin_started_from_r.out
## C:\Users\admin\AppData\Local\Temp\Rtmpwp73tI/h2o_admin_started_from_r.err
##
##
## Starting H2O JVM and connecting: Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 1 seconds 205 milliseconds
## H2O cluster version: 3.8.1.3
## H2O cluster name: H2O_started_from_R_admin_gcz764
## H2O cluster total nodes: 1
## H2O cluster total memory: 7.10 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.3.0 (2016-05-03)
h2o.init()
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 1 seconds 485 milliseconds
## H2O cluster version: 3.8.1.3
## H2O cluster name: H2O_started_from_R_admin_gcz764
## H2O cluster total nodes: 1
## H2O cluster total memory: 7.10 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## R Version: R version 3.3.0 (2016-05-03)
#split data into datafame
samp <- sample(nrow(traindata), 0.7 * nrow(traindata))
training <- traindata[samp, ]
testing <- traindata[-samp, ]
#convert to H2O frame
train.h2o <- as.h2o(traindata); test.h2o <- as.h2o(testing)
##
|
| | 0%
|
|==========================================================================================| 100%
##
|
| | 0%
|
|==========================================================================================| 100%
### values below for columns
y.dep <- 4 #interested in PREMIUM COLUMNS
x.indep <- c(5:7) # use all varibles COLUMNS from PQP + BIDS + QUOTA
#GBM
system.time(
gbm.model <- h2o.gbm(y=y.dep, x=x.indep, training_frame = train.h2o, ntrees = 1000, max_depth = 4, learn_rate = 0.01, seed = 1122)
)
##
|
| | 0%
|
|========================= | 28%
|
|=================================================== | 57%
|
|====================================================================== | 78%
|
|==========================================================================================| 100%
## user system elapsed
## 0.14 0.00 4.70
h2o.varimp(gbm.model)
## Variable Importances:
## variable relative_importance scaled_importance percentage
## 1 PQP 1612447219712.000000 1.000000 0.849386
## 2 BIDS 176865198080.000000 0.109687 0.093167
## 3 QUOTA 109056024576.000000 0.067634 0.057447
#h2o.performance(gbm.model)
# predict against test data
#predict.gbm <- as.data.frame(h2o.predict(gbm.model, test.h2o))
###############################################################
# i want to put in my figures to predict, so i put in PQP
##############################################################
mypqpdata <- data.frame(PQP=46454)
#convert to h20 frame
result_premium <- as.h2o(mypqpdata)
##
|
| | 0%
|
|==========================================================================================| 100%
predict.gbm <- as.data.frame(h2o.predict(gbm.model, result_premium))
##
|
| | 0%
|
|==========================================================================================| 100%