0. Preprocessing Data
str(iris)
'data.frame': 150 obs. of 6 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
$ dv : logi FALSE TRUE FALSE TRUE FALSE TRUE ...
Add column to divide test and train data
set.seed(19731027)
iris$dv = FALSE
iris[sort(sample(nrow(iris), nrow(iris[,])*.8, replace=FALSE)),'dv'] = TRUE
head(iris)
Split Train and Test data
iris.train <- iris[iris$dv==TRUE,-6]
iris.test <- iris[iris$dv==FALSE,-6]
nrow(iris.train)
[1] 120
nrow(iris.test)
[1] 30
1. Multinominal Regression Analysis
Make Multinomial Model
library(nnet)
regression.model <- multinom(Species~.,data=iris.train)
# weights: 18 (10 variable)
initial value 131.833475
iter 10 value 12.062172
iter 20 value 2.662125
iter 30 value 2.140514
iter 40 value 2.117763
iter 50 value 2.114543
iter 60 value 2.080945
iter 70 value 1.993777
iter 80 value 1.981856
iter 90 value 1.973497
iter 100 value 1.958383
final value 1.958383
stopped after 100 iterations
rg.predicted <- predict(regression.model,iris.test)
Make Confusion Matrix
table(rg.predicted, iris.test$Species)
rg.predicted setosa versicolor virginica
setosa 15 0 0
versicolor 0 8 1
virginica 0 0 6
Calculate Accuracy with Test Data
sum(rg.predicted == iris.test$Species)/nrow(iris.test)
[1] 0.9666667
2.Decision Tree
Make Decion Tree Model
library(rpart)
dt.model <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data=iris.train, method = "class")
dt.predicted <- predict(dt.model,iris.test,type="class")
Make Confusion Matrix
table(dt.predicted, iris.test$Species)
dt.predicted setosa versicolor virginica
setosa 15 0 0
versicolor 0 7 2
virginica 0 1 5
Calculate Accuracy with Test Data
sum(dt.predicted == iris.test$Species)/nrow(iris.test)
[1] 0.9
Plot the Decion Tree
load mandatory library
library(rpart.plot)
library(RColorBrewer)
library(rattle)
Rattle: A free graphical interface for data mining with R.
Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(dt.model)

3.KNN
Make train, test data for knn and train label
library(class)
iris.train.label <- iris.train$Species
iris.knn.train <- iris.train[,-5]
iris.knn.test <- iris.test[,-5]
Make Model
knn.predicted <- knn(train = iris.knn.train, test = iris.knn.test, cl = iris.train.label, k = 3)
Make Confusion Matrix
table(knn.predicted, iris.test$Species)
knn.predicted setosa versicolor virginica
setosa 15 0 0
versicolor 0 7 2
virginica 0 1 5
Calculate Accuracy with Test Data
sum(knn.predicted == iris.test$Species)/nrow(iris.test)
[1] 0.9
4.Deep Learning with H2o
Load H2o library
library(h2o)
필요한 패키지를 로딩중입니다: statmod
----------------------------------------------------------------------
Your next step is to start H2O:
> h2o.init()
For H2O package documentation, ask for help:
> ??h2o
After starting H2O, you can use the Web UI at http://localhost:54321
For more information visit http://docs.h2o.ai
----------------------------------------------------------------------
다음의 패키지를 부착합니다: ‘h2o’
The following objects are masked from ‘package:stats’:
cor, sd, var
The following objects are masked from ‘package:base’:
%*%, %in%, &&, ||, apply, as.factor, as.numeric, colnames, colnames<-, ifelse, is.character, is.factor, is.numeric, log,
log10, log1p, log2, round, signif, trunc
initialize h2o instance
localH2O <- h2o.init(ip = 'localhost', port = 54321, max_mem_size = '4g')
H2O is not running yet, starting it now...
Note: In case of errors look at the following log files:
C:\Users\hadooper\AppData\Local\Temp\RtmpcJBt6e/h2o_hadooper_started_from_r.out
C:\Users\hadooper\AppData\Local\Temp\RtmpcJBt6e/h2o_hadooper_started_from_r.err
java version "1.8.0_101"
Java(TM) SE Runtime Environment (build 1.8.0_101-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.101-b13, mixed mode)
Starting H2O JVM and connecting: .. Connection successful!
R is connected to the H2O cluster:
H2O cluster uptime: 6 seconds 275 milliseconds
H2O cluster version: 3.10.0.8
H2O cluster version age: 2 months and 10 days
H2O cluster name: H2O_started_from_R_hadooper_lqw684
H2O cluster total nodes: 1
H2O cluster total memory: 3.56 GB
H2O cluster total cores: 4
H2O cluster allowed cores: 2
H2O cluster healthy: TRUE
H2O Connection ip: localhost
H2O Connection port: 54321
H2O Connection proxy: NA
R Version: R version 3.3.2 (2016-10-31)
Note: As started, H2O is limited to the CRAN default of 2 CPUs.
Shut down and restart H2O as shown below to use all your CPUs.
> h2o.shutdown()
> h2o.init(nthreads = -1)
Transfer Iris dataframe to H2o instance
iris.h2o.train <- as.h2o(iris.train, destination_frame = "iris.h2o.train")
|
| | 0%
|
|=================================================================================================================================| 100%
iris.h2o.test <- as.h2o(iris.test, destination_frame = "iris.h2o.test")
|
| | 0%
|
|=================================================================================================================================| 100%
nrow(iris.h2o.train)
[1] 120
nrow(iris.h2o.test)
[1] 30
Make Gradient Boosted Model
model.h2o <- h2o.deeplearning(x=1:4, y=5, training_frame = iris.h2o.train)
|
| | 0%
|
|=================================================================================================================================| 100%
h2o.predicted <- h2o.predict(model.h2o, iris.h2o.test)[,"predict"]
|
| | 0%
|
|=================================================================================================================================| 100%
Make Confusion Matrix
table(as.vector(h2o.predicted), as.vector(iris.h2o.test$Species))
setosa versicolor virginica
setosa 15 0 0
versicolor 0 7 1
virginica 0 1 6
Calculate Accuracy with Test Data
sum(as.vector(h2o.predicted) == as.vector(iris.h2o.test$Species)) / length(as.vector(iris.h2o.test$Species))
[1] 0.9333333
LS0tDQp0aXRsZTogIkNyYWNraW5nIElyaXMgRGF0YSAiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyAwLiBQcmVwcm9jZXNzaW5nIERhdGENCmBgYHtyfQ0Kc3RyKGlyaXMpDQpgYGANCg0KIyMjIEFkZCBjb2x1bW4gdG8gZGl2aWRlIHRlc3QgYW5kIHRyYWluIGRhdGENCmBgYHtyfQ0Kc2V0LnNlZWQoMTk3MzEwMjcpDQppcmlzJGR2ID0gRkFMU0UNCmlyaXNbc29ydChzYW1wbGUobnJvdyhpcmlzKSwgbnJvdyhpcmlzWyxdKSouOCwgcmVwbGFjZT1GQUxTRSkpLCdkdiddID0gVFJVRQ0KaGVhZChpcmlzKQ0KYGBgDQoNCiMjIyBTcGxpdCBUcmFpbiBhbmQgVGVzdCBkYXRhDQpgYGB7cn0NCmlyaXMudHJhaW4gPC0gaXJpc1tpcmlzJGR2PT1UUlVFLC02XQ0KaXJpcy50ZXN0IDwtIGlyaXNbaXJpcyRkdj09RkFMU0UsLTZdDQpucm93KGlyaXMudHJhaW4pDQpucm93KGlyaXMudGVzdCkNCmBgYA0KDQojIyAxLiBNdWx0aW5vbWluYWwgUmVncmVzc2lvbiBBbmFseXNpcw0KDQojIyMgTWFrZSBNdWx0aW5vbWlhbCBNb2RlbA0KYGBge3J9DQpsaWJyYXJ5KG5uZXQpDQpyZWdyZXNzaW9uLm1vZGVsIDwtIG11bHRpbm9tKFNwZWNpZXN+LixkYXRhPWlyaXMudHJhaW4pDQpyZy5wcmVkaWN0ZWQgPC0gcHJlZGljdChyZWdyZXNzaW9uLm1vZGVsLGlyaXMudGVzdCkNCmBgYA0KDQojIyMgTWFrZSBDb25mdXNpb24gTWF0cml4DQpgYGB7cn0NCnRhYmxlKHJnLnByZWRpY3RlZCwgaXJpcy50ZXN0JFNwZWNpZXMpDQpgYGANCg0KIyMjIENhbGN1bGF0ZSBBY2N1cmFjeSB3aXRoIFRlc3QgRGF0YQ0KYGBge3J9DQpzdW0ocmcucHJlZGljdGVkID09IGlyaXMudGVzdCRTcGVjaWVzKS9ucm93KGlyaXMudGVzdCkNCmBgYA0KDQojIyAyLkRlY2lzaW9uIFRyZWUNCg0KIyMjIE1ha2UgRGVjaW9uIFRyZWUgTW9kZWwNCmBgYHtyfQ0KbGlicmFyeShycGFydCkNCmR0Lm1vZGVsIDwtIHJwYXJ0KFNwZWNpZXMgfiBTZXBhbC5MZW5ndGggKyBTZXBhbC5XaWR0aCArIFBldGFsLkxlbmd0aCArIFBldGFsLldpZHRoLCBkYXRhPWlyaXMudHJhaW4sIG1ldGhvZCA9ICJjbGFzcyIpDQpkdC5wcmVkaWN0ZWQgPC0gcHJlZGljdChkdC5tb2RlbCxpcmlzLnRlc3QsdHlwZT0iY2xhc3MiKQ0KYGBgDQoNCiMjIyBNYWtlIENvbmZ1c2lvbiBNYXRyaXgNCmBgYHtyfQ0KdGFibGUoZHQucHJlZGljdGVkLCBpcmlzLnRlc3QkU3BlY2llcykNCmBgYA0KDQojIyMgQ2FsY3VsYXRlIEFjY3VyYWN5IHdpdGggVGVzdCBEYXRhDQpgYGB7cn0NCnN1bShkdC5wcmVkaWN0ZWQgPT0gaXJpcy50ZXN0JFNwZWNpZXMpL25yb3coaXJpcy50ZXN0KQ0KYGBgDQoNCiMjIyBQbG90IHRoZSBEZWNpb24gVHJlZQ0KDQojIyMjIGxvYWQgbWFuZGF0b3J5IGxpYnJhcnkNCmBgYHtyLCByZXN1bHRzPUZBTFNFfQ0KbGlicmFyeShycGFydC5wbG90KQ0KbGlicmFyeShSQ29sb3JCcmV3ZXIpDQpsaWJyYXJ5KHJhdHRsZSkNCmBgYA0KDQpgYGB7cn0NCmZhbmN5UnBhcnRQbG90KGR0Lm1vZGVsKQ0KYGBgDQoNCiMjIDMuS05ODQoNCiMjIyBNYWtlIHRyYWluLCB0ZXN0IGRhdGEgZm9yIGtubiBhbmQgdHJhaW4gbGFiZWwNCmBgYHtyfQ0KbGlicmFyeShjbGFzcykNCmlyaXMudHJhaW4ubGFiZWwgPC0gaXJpcy50cmFpbiRTcGVjaWVzDQppcmlzLmtubi50cmFpbiA8LSBpcmlzLnRyYWluWywtNV0NCmlyaXMua25uLnRlc3QgPC0gaXJpcy50ZXN0WywtNV0NCmBgYA0KDQojIyMgTWFrZSBNb2RlbA0KYGBge3J9DQprbm4ucHJlZGljdGVkIDwtIGtubih0cmFpbiA9IGlyaXMua25uLnRyYWluLCB0ZXN0ID0gaXJpcy5rbm4udGVzdCwgY2wgPSBpcmlzLnRyYWluLmxhYmVsLCBrID0gMykNCmBgYA0KDQojIyMgTWFrZSBDb25mdXNpb24gTWF0cml4DQpgYGB7cn0NCnRhYmxlKGtubi5wcmVkaWN0ZWQsIGlyaXMudGVzdCRTcGVjaWVzKQ0KYGBgDQoNCiMjIyBDYWxjdWxhdGUgQWNjdXJhY3kgd2l0aCBUZXN0IERhdGENCmBgYHtyfQ0Kc3VtKGtubi5wcmVkaWN0ZWQgPT0gaXJpcy50ZXN0JFNwZWNpZXMpL25yb3coaXJpcy50ZXN0KQ0KYGBgDQoNCiMjIDQuRGVlcCBMZWFybmluZyB3aXRoIEgybw0KDQojIyMgTG9hZCBIMm8gbGlicmFyeQ0KYGBge3IsIHJlc3VsdHM9RkFMU0V9DQpsaWJyYXJ5KGgybykNCmBgYA0KDQojIyMgaW5pdGlhbGl6ZSBoMm8gaW5zdGFuY2UNCmBgYHtyfQ0KbG9jYWxIMk8gPC0gaDJvLmluaXQoaXAgPSAnbG9jYWxob3N0JywgcG9ydCA9IDU0MzIxLCBtYXhfbWVtX3NpemUgPSAnNGcnKQ0KYGBgDQoNCiMjIyBUcmFuc2ZlciBJcmlzIGRhdGFmcmFtZSB0byBIMm8gaW5zdGFuY2UgDQpgYGB7cn0NCmlyaXMuaDJvLnRyYWluIDwtIGFzLmgybyhpcmlzLnRyYWluLCBkZXN0aW5hdGlvbl9mcmFtZSA9ICJpcmlzLmgyby50cmFpbiIpDQppcmlzLmgyby50ZXN0IDwtIGFzLmgybyhpcmlzLnRlc3QsIGRlc3RpbmF0aW9uX2ZyYW1lID0gImlyaXMuaDJvLnRlc3QiKQ0KbnJvdyhpcmlzLmgyby50cmFpbikNCm5yb3coaXJpcy5oMm8udGVzdCkNCmBgYA0KDQoNCiMjIyBNYWtlIEdyYWRpZW50IEJvb3N0ZWQgTW9kZWwNCmBgYHtyfQ0KbW9kZWwuaDJvIDwtIGgyby5kZWVwbGVhcm5pbmcoeD0xOjQsIHk9NSwgdHJhaW5pbmdfZnJhbWUgPSBpcmlzLmgyby50cmFpbikNCmgyby5wcmVkaWN0ZWQgPC0gaDJvLnByZWRpY3QobW9kZWwuaDJvLCBpcmlzLmgyby50ZXN0KVssInByZWRpY3QiXQ0KYGBgDQoNCiMjIyBNYWtlIENvbmZ1c2lvbiBNYXRyaXgNCmBgYHtyfQ0KdGFibGUoYXMudmVjdG9yKGgyby5wcmVkaWN0ZWQpLCBhcy52ZWN0b3IoaXJpcy5oMm8udGVzdCRTcGVjaWVzKSkNCmBgYA0KDQojIyMgQ2FsY3VsYXRlIEFjY3VyYWN5IHdpdGggVGVzdCBEYXRhDQpgYGB7cn0NCnN1bShhcy52ZWN0b3IoaDJvLnByZWRpY3RlZCkgPT0gIGFzLnZlY3RvcihpcmlzLmgyby50ZXN0JFNwZWNpZXMpKSAvIGxlbmd0aChhcy52ZWN0b3IoaXJpcy5oMm8udGVzdCRTcGVjaWVzKSkNCmBgYA0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0K