library(dplyr)
# data filter dari
data_filtered <- iris %>% filter(Sepal.Length > 5)
##Data train
library(dplyr)
data_train <- iris %>% select(-Species)
iris %>% select(Species) %>% unique()
## Species
## 1 setosa
## 51 versicolor
## 101 virginica
iris %>% group_by(Species) %>% summarise(avg_sepal_length=mean(Sepal.Length)
,avg_sepal_width=mean(Sepal.Width)
,avg_petal_length=mean(Petal.Length)
,avg_petal_wodth=mean(Petal.Width))
## # A tibble: 3 × 5
## Species avg_sepal_length avg_sepal_width avg_petal_length avg_petal_wodth
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 setosa 5.01 3.43 1.46 0.246
## 2 versicolor 5.94 2.77 4.26 1.33
## 3 virginica 6.59 2.97 5.55 2.03
hist(iris$Sepal.Length)
mean(iris$Sepal.Length)
## [1] 5.843333
median(iris$Sepal.Length)
## [1] 5.8
You can also embed plots, for example:
# upsampling
library(caret)
library(dplyr)
RNGkind(sample.kind = "Rejection")
set.seed(100)
data_train <- upSample(
x = data_train %>% select(-Species),
y = data_train$Species,
yname = "Species"
)
prop.table(table(data_train$Species))
##
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
library(partykit)
model_dt <- ctree(formula = data_train$Species ~.,
data = data_train %>% select(-Species),
control = ctree_control(mincriterion=0.95))
plot(model_dt, type = "simple")
# prediction to data train
pred_train_dt <- predict(model_dt, newdata = iris)
confusionMatrix(pred_train_dt, reference = iris$Species, positive = "Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 49 5
## virginica 0 1 45
##
## Overall Statistics
##
## Accuracy : 0.96
## 95% CI : (0.915, 0.9852)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.94
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9800 0.9000
## Specificity 1.0000 0.9500 0.9900
## Pos Pred Value 1.0000 0.9074 0.9783
## Neg Pred Value 1.0000 0.9896 0.9519
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3267 0.3000
## Detection Prevalence 0.3333 0.3600 0.3067
## Balanced Accuracy 1.0000 0.9650 0.9450
result <- iris %>% mutate(prediksi=pred_train_dt)
result
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species prediksi
## 1 5.1 3.5 1.4 0.2 setosa setosa
## 2 4.9 3.0 1.4 0.2 setosa setosa
## 3 4.7 3.2 1.3 0.2 setosa setosa
## 4 4.6 3.1 1.5 0.2 setosa setosa
## 5 5.0 3.6 1.4 0.2 setosa setosa
## 6 5.4 3.9 1.7 0.4 setosa setosa
## 7 4.6 3.4 1.4 0.3 setosa setosa
## 8 5.0 3.4 1.5 0.2 setosa setosa
## 9 4.4 2.9 1.4 0.2 setosa setosa
## 10 4.9 3.1 1.5 0.1 setosa setosa
## 11 5.4 3.7 1.5 0.2 setosa setosa
## 12 4.8 3.4 1.6 0.2 setosa setosa
## 13 4.8 3.0 1.4 0.1 setosa setosa
## 14 4.3 3.0 1.1 0.1 setosa setosa
## 15 5.8 4.0 1.2 0.2 setosa setosa
## 16 5.7 4.4 1.5 0.4 setosa setosa
## 17 5.4 3.9 1.3 0.4 setosa setosa
## 18 5.1 3.5 1.4 0.3 setosa setosa
## 19 5.7 3.8 1.7 0.3 setosa setosa
## 20 5.1 3.8 1.5 0.3 setosa setosa
## 21 5.4 3.4 1.7 0.2 setosa setosa
## 22 5.1 3.7 1.5 0.4 setosa setosa
## 23 4.6 3.6 1.0 0.2 setosa setosa
## 24 5.1 3.3 1.7 0.5 setosa setosa
## 25 4.8 3.4 1.9 0.2 setosa setosa
## 26 5.0 3.0 1.6 0.2 setosa setosa
## 27 5.0 3.4 1.6 0.4 setosa setosa
## 28 5.2 3.5 1.5 0.2 setosa setosa
## 29 5.2 3.4 1.4 0.2 setosa setosa
## 30 4.7 3.2 1.6 0.2 setosa setosa
## 31 4.8 3.1 1.6 0.2 setosa setosa
## 32 5.4 3.4 1.5 0.4 setosa setosa
## 33 5.2 4.1 1.5 0.1 setosa setosa
## 34 5.5 4.2 1.4 0.2 setosa setosa
## 35 4.9 3.1 1.5 0.2 setosa setosa
## 36 5.0 3.2 1.2 0.2 setosa setosa
## 37 5.5 3.5 1.3 0.2 setosa setosa
## 38 4.9 3.6 1.4 0.1 setosa setosa
## 39 4.4 3.0 1.3 0.2 setosa setosa
## 40 5.1 3.4 1.5 0.2 setosa setosa
## 41 5.0 3.5 1.3 0.3 setosa setosa
## 42 4.5 2.3 1.3 0.3 setosa setosa
## 43 4.4 3.2 1.3 0.2 setosa setosa
## 44 5.0 3.5 1.6 0.6 setosa setosa
## 45 5.1 3.8 1.9 0.4 setosa setosa
## 46 4.8 3.0 1.4 0.3 setosa setosa
## 47 5.1 3.8 1.6 0.2 setosa setosa
## 48 4.6 3.2 1.4 0.2 setosa setosa
## 49 5.3 3.7 1.5 0.2 setosa setosa
## 50 5.0 3.3 1.4 0.2 setosa setosa
## 51 7.0 3.2 4.7 1.4 versicolor versicolor
## 52 6.4 3.2 4.5 1.5 versicolor versicolor
## 53 6.9 3.1 4.9 1.5 versicolor versicolor
## 54 5.5 2.3 4.0 1.3 versicolor versicolor
## 55 6.5 2.8 4.6 1.5 versicolor versicolor
## 56 5.7 2.8 4.5 1.3 versicolor versicolor
## 57 6.3 3.3 4.7 1.6 versicolor versicolor
## 58 4.9 2.4 3.3 1.0 versicolor versicolor
## 59 6.6 2.9 4.6 1.3 versicolor versicolor
## 60 5.2 2.7 3.9 1.4 versicolor versicolor
## 61 5.0 2.0 3.5 1.0 versicolor versicolor
## 62 5.9 3.0 4.2 1.5 versicolor versicolor
## 63 6.0 2.2 4.0 1.0 versicolor versicolor
## 64 6.1 2.9 4.7 1.4 versicolor versicolor
## 65 5.6 2.9 3.6 1.3 versicolor versicolor
## 66 6.7 3.1 4.4 1.4 versicolor versicolor
## 67 5.6 3.0 4.5 1.5 versicolor versicolor
## 68 5.8 2.7 4.1 1.0 versicolor versicolor
## 69 6.2 2.2 4.5 1.5 versicolor versicolor
## 70 5.6 2.5 3.9 1.1 versicolor versicolor
## 71 5.9 3.2 4.8 1.8 versicolor virginica
## 72 6.1 2.8 4.0 1.3 versicolor versicolor
## 73 6.3 2.5 4.9 1.5 versicolor versicolor
## 74 6.1 2.8 4.7 1.2 versicolor versicolor
## 75 6.4 2.9 4.3 1.3 versicolor versicolor
## 76 6.6 3.0 4.4 1.4 versicolor versicolor
## 77 6.8 2.8 4.8 1.4 versicolor versicolor
## 78 6.7 3.0 5.0 1.7 versicolor versicolor
## 79 6.0 2.9 4.5 1.5 versicolor versicolor
## 80 5.7 2.6 3.5 1.0 versicolor versicolor
## 81 5.5 2.4 3.8 1.1 versicolor versicolor
## 82 5.5 2.4 3.7 1.0 versicolor versicolor
## 83 5.8 2.7 3.9 1.2 versicolor versicolor
## 84 6.0 2.7 5.1 1.6 versicolor versicolor
## 85 5.4 3.0 4.5 1.5 versicolor versicolor
## 86 6.0 3.4 4.5 1.6 versicolor versicolor
## 87 6.7 3.1 4.7 1.5 versicolor versicolor
## 88 6.3 2.3 4.4 1.3 versicolor versicolor
## 89 5.6 3.0 4.1 1.3 versicolor versicolor
## 90 5.5 2.5 4.0 1.3 versicolor versicolor
## 91 5.5 2.6 4.4 1.2 versicolor versicolor
## 92 6.1 3.0 4.6 1.4 versicolor versicolor
## 93 5.8 2.6 4.0 1.2 versicolor versicolor
## 94 5.0 2.3 3.3 1.0 versicolor versicolor
## 95 5.6 2.7 4.2 1.3 versicolor versicolor
## 96 5.7 3.0 4.2 1.2 versicolor versicolor
## 97 5.7 2.9 4.2 1.3 versicolor versicolor
## 98 6.2 2.9 4.3 1.3 versicolor versicolor
## 99 5.1 2.5 3.0 1.1 versicolor versicolor
## 100 5.7 2.8 4.1 1.3 versicolor versicolor
## 101 6.3 3.3 6.0 2.5 virginica virginica
## 102 5.8 2.7 5.1 1.9 virginica virginica
## 103 7.1 3.0 5.9 2.1 virginica virginica
## 104 6.3 2.9 5.6 1.8 virginica virginica
## 105 6.5 3.0 5.8 2.2 virginica virginica
## 106 7.6 3.0 6.6 2.1 virginica virginica
## 107 4.9 2.5 4.5 1.7 virginica versicolor
## 108 7.3 2.9 6.3 1.8 virginica virginica
## 109 6.7 2.5 5.8 1.8 virginica virginica
## 110 7.2 3.6 6.1 2.5 virginica virginica
## 111 6.5 3.2 5.1 2.0 virginica virginica
## 112 6.4 2.7 5.3 1.9 virginica virginica
## 113 6.8 3.0 5.5 2.1 virginica virginica
## 114 5.7 2.5 5.0 2.0 virginica virginica
## 115 5.8 2.8 5.1 2.4 virginica virginica
## 116 6.4 3.2 5.3 2.3 virginica virginica
## 117 6.5 3.0 5.5 1.8 virginica virginica
## 118 7.7 3.8 6.7 2.2 virginica virginica
## 119 7.7 2.6 6.9 2.3 virginica virginica
## 120 6.0 2.2 5.0 1.5 virginica versicolor
## 121 6.9 3.2 5.7 2.3 virginica virginica
## 122 5.6 2.8 4.9 2.0 virginica virginica
## 123 7.7 2.8 6.7 2.0 virginica virginica
## 124 6.3 2.7 4.9 1.8 virginica virginica
## 125 6.7 3.3 5.7 2.1 virginica virginica
## 126 7.2 3.2 6.0 1.8 virginica virginica
## 127 6.2 2.8 4.8 1.8 virginica virginica
## 128 6.1 3.0 4.9 1.8 virginica virginica
## 129 6.4 2.8 5.6 2.1 virginica virginica
## 130 7.2 3.0 5.8 1.6 virginica versicolor
## 131 7.4 2.8 6.1 1.9 virginica virginica
## 132 7.9 3.8 6.4 2.0 virginica virginica
## 133 6.4 2.8 5.6 2.2 virginica virginica
## 134 6.3 2.8 5.1 1.5 virginica versicolor
## 135 6.1 2.6 5.6 1.4 virginica versicolor
## 136 7.7 3.0 6.1 2.3 virginica virginica
## 137 6.3 3.4 5.6 2.4 virginica virginica
## 138 6.4 3.1 5.5 1.8 virginica virginica
## 139 6.0 3.0 4.8 1.8 virginica virginica
## 140 6.9 3.1 5.4 2.1 virginica virginica
## 141 6.7 3.1 5.6 2.4 virginica virginica
## 142 6.9 3.1 5.1 2.3 virginica virginica
## 143 5.8 2.7 5.1 1.9 virginica virginica
## 144 6.8 3.2 5.9 2.3 virginica virginica
## 145 6.7 3.3 5.7 2.5 virginica virginica
## 146 6.7 3.0 5.2 2.3 virginica virginica
## 147 6.3 2.5 5.0 1.9 virginica virginica
## 148 6.5 3.0 5.2 2.0 virginica virginica
## 149 6.2 3.4 5.4 2.3 virginica virginica
## 150 5.9 3.0 5.1 1.8 virginica virginica
pred_test_dt <- predict(model_dt, newdata = data_test)
confusionMatrix(pred_test_dt, reference = data_test$Species, positive = "Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 11 0 0
## versicolor 0 10 1
## virginica 0 0 8
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3667
## P-Value [Acc > NIR] : 4.476e-12
##
## Kappa : 0.9497
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8889
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9545
## Prevalence 0.3667 0.3333 0.3000
## Detection Rate 0.3667 0.3333 0.2667
## Detection Prevalence 0.3667 0.3667 0.2667
## Balanced Accuracy 1.0000 0.9750 0.9444
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
set.seed(417)
ctrl <- trainControl(method = "repeatedcv",
number = 5, # k-fold
repeats = 3) # repetisi
model_forest <- train(Species ~ .,
data = data_train,
method = "rf", # random forest
trControl = ctrl)
#confusion matrix data train
pred_train_rf <- predict(model_forest, newdata = data_train)
confusionMatrix(pred_train_rf, reference = data_train$Species, positive = "Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 41 0 0
## versicolor 0 41 0
## virginica 0 0 41
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9705, 1)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3333
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 1.0000 1.0000
#confusion matrix data train
pred_test_rf <- predict(model_forest, newdata = data_test)
confusionMatrix(pred_test_rf, reference = data_test$Species, positive = "Yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 11 0 0
## versicolor 0 9 0
## virginica 0 1 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3667
## P-Value [Acc > NIR] : 4.476e-12
##
## Kappa : 0.9499
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9000 1.0000
## Specificity 1.0000 1.0000 0.9524
## Pos Pred Value 1.0000 1.0000 0.9000
## Neg Pred Value 1.0000 0.9524 1.0000
## Prevalence 0.3667 0.3333 0.3000
## Detection Rate 0.3667 0.3000 0.3000
## Detection Prevalence 0.3667 0.3000 0.3333
## Balanced Accuracy 1.0000 0.9500 0.9762
iris[3,"Sepal.Width"]
## [1] 3.2
length(data_intrain)
## [1] 120
120/150
## [1] 0.8