Contoh penggunaan filter

library(dplyr)

# data filter dari
data_filtered <- iris %>% filter(Sepal.Length > 5)

##Data train

library(dplyr)
data_train <- iris %>% select(-Species)

Data unique

Mencari data yang ada

iris %>% select(Species) %>% unique()
##        Species
## 1       setosa
## 51  versicolor
## 101  virginica

Group By

iris %>% group_by(Species) %>% summarise(avg_sepal_length=mean(Sepal.Length)
                                         ,avg_sepal_width=mean(Sepal.Width)
                                         ,avg_petal_length=mean(Petal.Length)
                                         ,avg_petal_wodth=mean(Petal.Width))
## # A tibble: 3 × 5
##   Species    avg_sepal_length avg_sepal_width avg_petal_length avg_petal_wodth
##   <fct>                 <dbl>           <dbl>            <dbl>           <dbl>
## 1 setosa                 5.01            3.43             1.46           0.246
## 2 versicolor             5.94            2.77             4.26           1.33 
## 3 virginica              6.59            2.97             5.55           2.03

Skewness

hist(iris$Sepal.Length)

mean(iris$Sepal.Length)
## [1] 5.843333
median(iris$Sepal.Length)
## [1] 5.8

Including Plots

You can also embed plots, for example:

Upsample

# upsampling

library(caret)
library(dplyr)

RNGkind(sample.kind = "Rejection")
set.seed(100)

data_train <- upSample(
  x = data_train %>% select(-Species),
  y = data_train$Species,
  yname = "Species"
)

prop.table(table(data_train$Species))
## 
##     setosa versicolor  virginica 
##  0.3333333  0.3333333  0.3333333

Plot Model

library(partykit)
model_dt <- ctree(formula = data_train$Species ~.,
                  data = data_train %>% select(-Species),
                  control = ctree_control(mincriterion=0.95))
plot(model_dt, type = "simple")

Confusiin Matrix

# prediction to data train
pred_train_dt <- predict(model_dt, newdata = iris)
confusionMatrix(pred_train_dt, reference = iris$Species, positive = "Yes")
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         50          0         0
##   versicolor      0         49         5
##   virginica       0          1        45
## 
## Overall Statistics
##                                          
##                Accuracy : 0.96           
##                  95% CI : (0.915, 0.9852)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.94           
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9800           0.9000
## Specificity                 1.0000            0.9500           0.9900
## Pos Pred Value              1.0000            0.9074           0.9783
## Neg Pred Value              1.0000            0.9896           0.9519
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3267           0.3000
## Detection Prevalence        0.3333            0.3600           0.3067
## Balanced Accuracy           1.0000            0.9650           0.9450
result <- iris %>% mutate(prediksi=pred_train_dt)
result
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species   prediksi
## 1            5.1         3.5          1.4         0.2     setosa     setosa
## 2            4.9         3.0          1.4         0.2     setosa     setosa
## 3            4.7         3.2          1.3         0.2     setosa     setosa
## 4            4.6         3.1          1.5         0.2     setosa     setosa
## 5            5.0         3.6          1.4         0.2     setosa     setosa
## 6            5.4         3.9          1.7         0.4     setosa     setosa
## 7            4.6         3.4          1.4         0.3     setosa     setosa
## 8            5.0         3.4          1.5         0.2     setosa     setosa
## 9            4.4         2.9          1.4         0.2     setosa     setosa
## 10           4.9         3.1          1.5         0.1     setosa     setosa
## 11           5.4         3.7          1.5         0.2     setosa     setosa
## 12           4.8         3.4          1.6         0.2     setosa     setosa
## 13           4.8         3.0          1.4         0.1     setosa     setosa
## 14           4.3         3.0          1.1         0.1     setosa     setosa
## 15           5.8         4.0          1.2         0.2     setosa     setosa
## 16           5.7         4.4          1.5         0.4     setosa     setosa
## 17           5.4         3.9          1.3         0.4     setosa     setosa
## 18           5.1         3.5          1.4         0.3     setosa     setosa
## 19           5.7         3.8          1.7         0.3     setosa     setosa
## 20           5.1         3.8          1.5         0.3     setosa     setosa
## 21           5.4         3.4          1.7         0.2     setosa     setosa
## 22           5.1         3.7          1.5         0.4     setosa     setosa
## 23           4.6         3.6          1.0         0.2     setosa     setosa
## 24           5.1         3.3          1.7         0.5     setosa     setosa
## 25           4.8         3.4          1.9         0.2     setosa     setosa
## 26           5.0         3.0          1.6         0.2     setosa     setosa
## 27           5.0         3.4          1.6         0.4     setosa     setosa
## 28           5.2         3.5          1.5         0.2     setosa     setosa
## 29           5.2         3.4          1.4         0.2     setosa     setosa
## 30           4.7         3.2          1.6         0.2     setosa     setosa
## 31           4.8         3.1          1.6         0.2     setosa     setosa
## 32           5.4         3.4          1.5         0.4     setosa     setosa
## 33           5.2         4.1          1.5         0.1     setosa     setosa
## 34           5.5         4.2          1.4         0.2     setosa     setosa
## 35           4.9         3.1          1.5         0.2     setosa     setosa
## 36           5.0         3.2          1.2         0.2     setosa     setosa
## 37           5.5         3.5          1.3         0.2     setosa     setosa
## 38           4.9         3.6          1.4         0.1     setosa     setosa
## 39           4.4         3.0          1.3         0.2     setosa     setosa
## 40           5.1         3.4          1.5         0.2     setosa     setosa
## 41           5.0         3.5          1.3         0.3     setosa     setosa
## 42           4.5         2.3          1.3         0.3     setosa     setosa
## 43           4.4         3.2          1.3         0.2     setosa     setosa
## 44           5.0         3.5          1.6         0.6     setosa     setosa
## 45           5.1         3.8          1.9         0.4     setosa     setosa
## 46           4.8         3.0          1.4         0.3     setosa     setosa
## 47           5.1         3.8          1.6         0.2     setosa     setosa
## 48           4.6         3.2          1.4         0.2     setosa     setosa
## 49           5.3         3.7          1.5         0.2     setosa     setosa
## 50           5.0         3.3          1.4         0.2     setosa     setosa
## 51           7.0         3.2          4.7         1.4 versicolor versicolor
## 52           6.4         3.2          4.5         1.5 versicolor versicolor
## 53           6.9         3.1          4.9         1.5 versicolor versicolor
## 54           5.5         2.3          4.0         1.3 versicolor versicolor
## 55           6.5         2.8          4.6         1.5 versicolor versicolor
## 56           5.7         2.8          4.5         1.3 versicolor versicolor
## 57           6.3         3.3          4.7         1.6 versicolor versicolor
## 58           4.9         2.4          3.3         1.0 versicolor versicolor
## 59           6.6         2.9          4.6         1.3 versicolor versicolor
## 60           5.2         2.7          3.9         1.4 versicolor versicolor
## 61           5.0         2.0          3.5         1.0 versicolor versicolor
## 62           5.9         3.0          4.2         1.5 versicolor versicolor
## 63           6.0         2.2          4.0         1.0 versicolor versicolor
## 64           6.1         2.9          4.7         1.4 versicolor versicolor
## 65           5.6         2.9          3.6         1.3 versicolor versicolor
## 66           6.7         3.1          4.4         1.4 versicolor versicolor
## 67           5.6         3.0          4.5         1.5 versicolor versicolor
## 68           5.8         2.7          4.1         1.0 versicolor versicolor
## 69           6.2         2.2          4.5         1.5 versicolor versicolor
## 70           5.6         2.5          3.9         1.1 versicolor versicolor
## 71           5.9         3.2          4.8         1.8 versicolor  virginica
## 72           6.1         2.8          4.0         1.3 versicolor versicolor
## 73           6.3         2.5          4.9         1.5 versicolor versicolor
## 74           6.1         2.8          4.7         1.2 versicolor versicolor
## 75           6.4         2.9          4.3         1.3 versicolor versicolor
## 76           6.6         3.0          4.4         1.4 versicolor versicolor
## 77           6.8         2.8          4.8         1.4 versicolor versicolor
## 78           6.7         3.0          5.0         1.7 versicolor versicolor
## 79           6.0         2.9          4.5         1.5 versicolor versicolor
## 80           5.7         2.6          3.5         1.0 versicolor versicolor
## 81           5.5         2.4          3.8         1.1 versicolor versicolor
## 82           5.5         2.4          3.7         1.0 versicolor versicolor
## 83           5.8         2.7          3.9         1.2 versicolor versicolor
## 84           6.0         2.7          5.1         1.6 versicolor versicolor
## 85           5.4         3.0          4.5         1.5 versicolor versicolor
## 86           6.0         3.4          4.5         1.6 versicolor versicolor
## 87           6.7         3.1          4.7         1.5 versicolor versicolor
## 88           6.3         2.3          4.4         1.3 versicolor versicolor
## 89           5.6         3.0          4.1         1.3 versicolor versicolor
## 90           5.5         2.5          4.0         1.3 versicolor versicolor
## 91           5.5         2.6          4.4         1.2 versicolor versicolor
## 92           6.1         3.0          4.6         1.4 versicolor versicolor
## 93           5.8         2.6          4.0         1.2 versicolor versicolor
## 94           5.0         2.3          3.3         1.0 versicolor versicolor
## 95           5.6         2.7          4.2         1.3 versicolor versicolor
## 96           5.7         3.0          4.2         1.2 versicolor versicolor
## 97           5.7         2.9          4.2         1.3 versicolor versicolor
## 98           6.2         2.9          4.3         1.3 versicolor versicolor
## 99           5.1         2.5          3.0         1.1 versicolor versicolor
## 100          5.7         2.8          4.1         1.3 versicolor versicolor
## 101          6.3         3.3          6.0         2.5  virginica  virginica
## 102          5.8         2.7          5.1         1.9  virginica  virginica
## 103          7.1         3.0          5.9         2.1  virginica  virginica
## 104          6.3         2.9          5.6         1.8  virginica  virginica
## 105          6.5         3.0          5.8         2.2  virginica  virginica
## 106          7.6         3.0          6.6         2.1  virginica  virginica
## 107          4.9         2.5          4.5         1.7  virginica versicolor
## 108          7.3         2.9          6.3         1.8  virginica  virginica
## 109          6.7         2.5          5.8         1.8  virginica  virginica
## 110          7.2         3.6          6.1         2.5  virginica  virginica
## 111          6.5         3.2          5.1         2.0  virginica  virginica
## 112          6.4         2.7          5.3         1.9  virginica  virginica
## 113          6.8         3.0          5.5         2.1  virginica  virginica
## 114          5.7         2.5          5.0         2.0  virginica  virginica
## 115          5.8         2.8          5.1         2.4  virginica  virginica
## 116          6.4         3.2          5.3         2.3  virginica  virginica
## 117          6.5         3.0          5.5         1.8  virginica  virginica
## 118          7.7         3.8          6.7         2.2  virginica  virginica
## 119          7.7         2.6          6.9         2.3  virginica  virginica
## 120          6.0         2.2          5.0         1.5  virginica versicolor
## 121          6.9         3.2          5.7         2.3  virginica  virginica
## 122          5.6         2.8          4.9         2.0  virginica  virginica
## 123          7.7         2.8          6.7         2.0  virginica  virginica
## 124          6.3         2.7          4.9         1.8  virginica  virginica
## 125          6.7         3.3          5.7         2.1  virginica  virginica
## 126          7.2         3.2          6.0         1.8  virginica  virginica
## 127          6.2         2.8          4.8         1.8  virginica  virginica
## 128          6.1         3.0          4.9         1.8  virginica  virginica
## 129          6.4         2.8          5.6         2.1  virginica  virginica
## 130          7.2         3.0          5.8         1.6  virginica versicolor
## 131          7.4         2.8          6.1         1.9  virginica  virginica
## 132          7.9         3.8          6.4         2.0  virginica  virginica
## 133          6.4         2.8          5.6         2.2  virginica  virginica
## 134          6.3         2.8          5.1         1.5  virginica versicolor
## 135          6.1         2.6          5.6         1.4  virginica versicolor
## 136          7.7         3.0          6.1         2.3  virginica  virginica
## 137          6.3         3.4          5.6         2.4  virginica  virginica
## 138          6.4         3.1          5.5         1.8  virginica  virginica
## 139          6.0         3.0          4.8         1.8  virginica  virginica
## 140          6.9         3.1          5.4         2.1  virginica  virginica
## 141          6.7         3.1          5.6         2.4  virginica  virginica
## 142          6.9         3.1          5.1         2.3  virginica  virginica
## 143          5.8         2.7          5.1         1.9  virginica  virginica
## 144          6.8         3.2          5.9         2.3  virginica  virginica
## 145          6.7         3.3          5.7         2.5  virginica  virginica
## 146          6.7         3.0          5.2         2.3  virginica  virginica
## 147          6.3         2.5          5.0         1.9  virginica  virginica
## 148          6.5         3.0          5.2         2.0  virginica  virginica
## 149          6.2         3.4          5.4         2.3  virginica  virginica
## 150          5.9         3.0          5.1         1.8  virginica  virginica
pred_test_dt <- predict(model_dt, newdata = data_test)
confusionMatrix(pred_test_dt, reference = data_test$Species, positive = "Yes")
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         11          0         0
##   versicolor      0         10         1
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3667          
##     P-Value [Acc > NIR] : 4.476e-12       
##                                           
##                   Kappa : 0.9497          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8889
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9545
## Prevalence                  0.3667            0.3333           0.3000
## Detection Rate              0.3667            0.3333           0.2667
## Detection Prevalence        0.3667            0.3667           0.2667
## Balanced Accuracy           1.0000            0.9750           0.9444

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Random Forest

set.seed(417)

ctrl <- trainControl(method = "repeatedcv",
                      number = 5, # k-fold
                      repeats = 3) # repetisi
model_forest <- train(Species ~ .,
                    data = data_train,
                    method = "rf", # random forest
                    trControl = ctrl)

Confusion Matrix

#confusion matrix data train
pred_train_rf <- predict(model_forest, newdata = data_train)
confusionMatrix(pred_train_rf, reference = data_train$Species, positive = "Yes")
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         41          0         0
##   versicolor      0         41         0
##   virginica       0          0        41
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9705, 1)
##     No Information Rate : 0.3333     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           1.0000
## Specificity                 1.0000            1.0000           1.0000
## Pos Pred Value              1.0000            1.0000           1.0000
## Neg Pred Value              1.0000            1.0000           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3333
## Detection Prevalence        0.3333            0.3333           0.3333
## Balanced Accuracy           1.0000            1.0000           1.0000
#confusion matrix data train
pred_test_rf <- predict(model_forest, newdata = data_test)
confusionMatrix(pred_test_rf, reference = data_test$Species, positive = "Yes")
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         11          0         0
##   versicolor      0          9         0
##   virginica       0          1         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3667          
##     P-Value [Acc > NIR] : 4.476e-12       
##                                           
##                   Kappa : 0.9499          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9524
## Pos Pred Value              1.0000            1.0000           0.9000
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3667            0.3333           0.3000
## Detection Rate              0.3667            0.3000           0.3000
## Detection Prevalence        0.3667            0.3000           0.3333
## Balanced Accuracy           1.0000            0.9500           0.9762
iris[3,"Sepal.Width"]
## [1] 3.2
length(data_intrain)
## [1] 120
120/150
## [1] 0.8