Visualisasi Data

Memuat Package

library(tidyverse)
library(ggplot2)

Dataset

datairis <- 
  read.csv("Dataset Iris.csv", sep = ";")
submission <-
  read.csv("Data iris submission.csv", sep = ";")

Misssing Data

datairis %>%
  count(is.na(.))
##   is.na(.).Id is.na(.).SepalLengthCm is.na(.).SepalWidthCm
## 1       FALSE                  FALSE                 FALSE
## 2       FALSE                  FALSE                 FALSE
## 3       FALSE                  FALSE                 FALSE
## 4       FALSE                  FALSE                  TRUE
## 5       FALSE                   TRUE                 FALSE
##   is.na(.).PetalLengthCm is.na(.).PetalWidthCm is.na(.).Species   n
## 1                  FALSE                 FALSE            FALSE 122
## 2                  FALSE                  TRUE            FALSE   3
## 3                   TRUE                 FALSE            FALSE   2
## 4                  FALSE                 FALSE            FALSE   3
## 5                  FALSE                 FALSE            FALSE   5
datairis <- 
  datairis %>%
  drop_na()

Distribusi Data Berdasarkan jenis Bunga

datairis %>%
  ggplot() +
  geom_boxplot(aes(Species, SepalWidthCm))+
  labs(title = "Distribusi Data berdasarkan Jenis Bunga",
       subtitle = "Sepal Width Cm",
       caption = "by : Arwan Zhagi")+
  xlab("Jenis Bunga")+
  ylab("Sepal  Width Cm")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
        plot.subtitle = element_text(size = 10, face = "bold"))

datairis %>%
  ggplot() +
  geom_boxplot(aes(Species, SepalLengthCm))+
  labs(title = "Distribusi Data berdasarkan Jenis Bunga",
       subtitle = "Sepal Length Cm",
       caption = "by : Arwan Zhagi")+
  xlab("Jenis Bunga")+
  ylab("Sepal  Length Cm")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
        plot.subtitle = element_text(size = 10, face = "bold"))

datairis %>%
  ggplot() +
  geom_boxplot(aes(Species, PetalWidthCm))+
  labs(title = "Distribusi Data berdasarkan Jenis Bunga",
       subtitle = "Petal Width Cm",
       caption = "by : Arwan Zhagi")+
  xlab("Jenis Bunga")+
  ylab("Petal Width Cm")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
        plot.subtitle = element_text(size = 10, face = "bold"))

datairis %>%
  ggplot() +
  geom_boxplot(aes(Species, PetalLengthCm), show.legend = T)+
  labs(title = "Distribusi Data berdasarkan Jenis Bunga",
       subtitle = "Petal Length Cm",
       caption = "by : Arwan Zhagi")+
  xlab("Jenis Bunga")+
  ylab("Petal Length Cm")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
        plot.subtitle = element_text(size = 10, face = "bold"))

Hubungan Antara Dua Variabel Berdaasarkan Jenis Bunga

datairis%>%
  ggplot(aes(SepalWidthCm,SepalLengthCm))+
  geom_point(aes(color = Species))+
  facet_grid(. ~ Species)+
  labs(title = "Hubungan Sepal Length dengan\nSepal Width Berdasarkan\nJenis Bunga ",
       caption = "by : Arwan Zhagi")+
  xlab("Sepal Length")+
  ylab("Sepal Width")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
        )

datairis%>%
  ggplot(aes(SepalWidthCm,PetalLengthCm))+
  geom_point(aes(color = Species))+
  facet_grid(. ~ Species)+
  labs(title = "Hubungan Sepal Width dengan\npetal Length Berdasarkan\nJenis Bunga ",
       caption = "by : Arwan Zhagi")+
  xlab("Sepal Width")+
  ylab("petal Length")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
  )

datairis%>%
  ggplot(aes(PetalWidthCm,SepalLengthCm))+
  geom_point(aes(color = Species))+
  facet_grid(. ~ Species)+
  labs(title = "Hubungan Petal Width dengan\nSepal Length Berdasarkan\nJenis Bunga ",
       caption = "by : Arwan Zhagi")+
  xlab("Petal Width")+
  ylab("Sepal Length")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
  )

datairis%>%
  ggplot(aes(PetalWidthCm,PetalLengthCm))+
  geom_point(aes(color = Species))+
  facet_grid(. ~ Species)+
  labs(title = "Hubungan Petal Width dengan\nPetal Length Berdasarkan\nJenis Bunga ",
       caption = "by : Arwan Zhagi")+
  xlab("Petal Width")+
  ylab("Petal Length")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold", hjust = 0.5),
  )



CLASSIFICATIONS

Dataset Species Diubah Menjadi Numerik

datairis <- 
  datairis %>% 
  mutate(SpeciesNum = 
           ifelse(Species == "Iris-setosa",1,
                  ifelse(Species == "Iris-versicolor",2,3)))


Regresi Logistik

Memuat Package

library(tidyverse)
library(caret)

Model Dataset Full

set.seed(1)

# Membagi Data Menjadi Training dan Test ----

sampled_iris <- datairis %>% 
  mutate(training = sample(0:1,
                           nrow(datairis),
                           replace = TRUE))

training_data <- sampled_iris %>% 
  filter(training == 1)

test_data <- sampled_iris %>%
  filter(training == 0)

# Model Regresi ----
model1 <-
  datairis%>%
  glm(SpeciesNum ~ SepalLengthCm + SepalWidthCm +
        PetalLengthCm + PetalWidthCm ,data = .)

summary(model1)
## 
## Call:
## glm(formula = SpeciesNum ~ SepalLengthCm + SepalWidthCm + PetalLengthCm + 
##     PetalWidthCm, data = .)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.58569  -0.15004   0.02204   0.10421   0.50995  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.29668    0.22890   5.665 1.07e-07 ***
## SepalLengthCm -0.12292    0.06213  -1.979  0.05022 .  
## SepalWidthCm  -0.05584    0.06694  -0.834  0.40591    
## PetalLengthCm  0.20505    0.06134   3.343  0.00112 ** 
## PetalWidthCm   0.67150    0.10125   6.632 1.08e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.04732437)
## 
##     Null deviance: 80.795  on 121  degrees of freedom
## Residual deviance:  5.537  on 117  degrees of freedom
## AIC: -19.073
## 
## Number of Fisher Scoring iterations: 2

Confusion Matrix

classify <- function(probability){
  ifelse( probability < 1.5, "setosa",
          ifelse(probability < 2.5,
                 "versicolor",
                 "virginica"))
  }



classified <- classify(predict(model1, datairis))

table(datairis$Species, classified, dnn= c("Data", "predictions"))
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         38          0         0
##   Iris-versicolor      0         39         2
##   Iris-virginica       0          1        42
confusion1 <-
  table(datairis$Species, classified, dnn= c("Data", "predictions"))

Akurasi Spesifikasi Sensitifiti

accuracy <- function(confusion_matrix) {  
  sum(diag(confusion_matrix))/sum(confusion_matrix)
  }

specificity <- function(confusion_matrix) { 
  confusion_matrix[1,1]/
  (confusion_matrix[1,1]+confusion_matrix[1,2])
  }

sensitivity <- function(confusion_matrix) {
  confusion_matrix[2,2]/
  (confusion_matrix[2,1]+confusion_matrix[2,2])
  }

prediction_summary <- function(confusion_matrix) {  
  c("accuracy" = accuracy(confusion_matrix),
    "specificity" = specificity(confusion_matrix), 
    "sensitivity" = sensitivity(confusion_matrix))
  }

prediction_summary(confusion1)
##    accuracy specificity sensitivity 
##   0.9754098   1.0000000   1.0000000

Model Dengan Sampling

set.seed(11)
# Model Regresi ----
model11 <-
  training_data%>%
  glm(SpeciesNum ~ SepalLengthCm + SepalWidthCm +
        PetalLengthCm + PetalWidthCm ,data = .)

summary(model11)
## 
## Call:
## glm(formula = SpeciesNum ~ SepalLengthCm + SepalWidthCm + PetalLengthCm + 
##     PetalWidthCm, data = .)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5726  -0.1626   0.0264   0.1129   0.4398  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.24361    0.33042   3.764 0.000393 ***
## SepalLengthCm -0.15243    0.09412  -1.620 0.110752    
## SepalWidthCm  -0.02441    0.10878  -0.224 0.823271    
## PetalLengthCm  0.28509    0.09786   2.913 0.005073 ** 
## PetalWidthCm   0.52110    0.16591   3.141 0.002651 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.06094456)
## 
##     Null deviance: 38.9841  on 62  degrees of freedom
## Residual deviance:  3.5348  on 58  degrees of freedom
## AIC: 9.3159
## 
## Number of Fisher Scoring iterations: 2
classified11 <- classify(predict(model11, test_data))

table(test_data$Species, classified11, dnn= c("Data", "predictions"))
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         22          0         0
##   Iris-versicolor      0         18         0
##   Iris-virginica       0          0        19
confusion11 <-
  table(test_data$Species, classified11, dnn= c("Data", "predictions"))

prediction_summary(confusion11)
##    accuracy specificity sensitivity 
##           1           1           1

Cross Validations

set.seed(111)

random_group <- 
  function(n, probs) { 
  probs <- probs / sum(probs) 
  g <- findInterval(seq(0, 1, length = n),
                    c(0, cumsum(probs)), 
                    rightmost.closed = TRUE)
  names(probs)[sample(g)] 
  }


partition <- 
  function(df, n, probs) { 
  replicate(n,
            split(df, random_group(nrow(df),
                                   probs)),
            FALSE) 
  }


prediction_accuracy_iris <- 
  function(test_and_training) { 
    confusion <- vector(mode = "list", 
                     length = length(test_and_training))
    akurasi_iris <- vector(mode = "list", 
                     length = length(test_and_training))
    for (i in seq_along(test_and_training)) {
      training <- 
        test_and_training[[i]]$training
      test <- test_and_training[[i]]$test 
      model <- training %>%
        glm(SpeciesNum ~ SepalLengthCm + SepalWidthCm +
        PetalLengthCm + PetalWidthCm ,data = .)
      classifications <- 
        classify(predict(model, test))
      targets <- test$Species 
      confusion[[i]] <-  table(test$Species, classifications, dnn= c("Data", "predictions")) 
      akurasi_iris[[i]] <- prediction_summary(confusion[[i]])
      
      }
    return(list(confusion, akurasi_iris))
    
  }


datairis %>%
  partition(5, c(training = 0.6, test = 0.4)) %>%
  prediction_accuracy_iris
## [[1]]
## [[1]][[1]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         11          0         0
##   Iris-versicolor      0         15         1
##   Iris-virginica       0          1        21
## 
## [[1]][[2]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         17          0         0
##   Iris-versicolor      0         16         0
##   Iris-virginica       0          2        14
## 
## [[1]][[3]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         16          0         0
##   Iris-versicolor      0         16         0
##   Iris-virginica       0          1        16
## 
## [[1]][[4]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         15          0         0
##   Iris-versicolor      0         17         2
##   Iris-virginica       0          0        15
## 
## [[1]][[5]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         12          0         0
##   Iris-versicolor      0         20         1
##   Iris-virginica       0          0        16
## 
## 
## [[2]]
## [[2]][[1]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[2]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[3]]
##    accuracy specificity sensitivity 
##   0.9795918   1.0000000   1.0000000 
## 
## [[2]][[4]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[5]]
##    accuracy specificity sensitivity 
##   0.9795918   1.0000000   1.0000000
regresi <- 
  datairis %>%
  partition(5, c(training = 0.6, test = 0.4)) %>%
  prediction_accuracy_iris


mean_accuracy <-
  function(listdata) {
    akurasi <- vector(mode = "list", 
                      length = length(listdata))
    for (i in seq_along(listdata)) {
      akurasi[i] <- listdata[[2]][[i]][1]
      
      }
    akurasi <-
      unlist(akurasi)
    return(mean(akurasi))
  }


mean_accuracy(regresi)
## [1] 0.9693878
regresi_accuracy <- 
  mean_accuracy(regresi)


Decisions Tree

Memuat Package

library(party)

Model Dataset Full

set.seed(2)


# Model Regresi Logistik ----
model2 <-
  datairis%>%
  ctree(SpeciesNum ~ SepalLengthCm + SepalLengthCm +
          PetalLengthCm + PetalWidthCm, data = .)

summary(model2)
##     Length      Class       Mode 
##          1 BinaryTree         S4

Confusion Matrix

classified2 <- classify(predict(model2, datairis))

table(datairis$Species, classified2, dnn= c("Data", "predictions"))
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         38          0         0
##   Iris-versicolor      0         40         1
##   Iris-virginica       0          3        40
confusion2 <-
  table(datairis$Species, classified2, dnn= c("Data", "predictions"))

Akurasi Spesifikasi Sensitifiti

prediction_summary(confusion2)
##    accuracy specificity sensitivity 
##   0.9672131   1.0000000   1.0000000

Model Dengan Sampling

set.seed(22)
# Model Regresi ----
model22 <-
  training_data%>%
  ctree(SpeciesNum ~ SepalLengthCm + SepalLengthCm+
          PetalLengthCm + PetalWidthCm, data = .)

summary(model22)
##     Length      Class       Mode 
##          1 BinaryTree         S4
classified22 <- classify(predict(model22, test_data))

table(test_data$Species, classified22, dnn= c("Data", "predictions"))
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         21          1         0
##   Iris-versicolor      0         17         1
##   Iris-virginica       0          0        19
confusion22 <-
  table(test_data$Species, classified22, dnn= c("Data", "predictions"))

prediction_summary(confusion22)
##    accuracy specificity sensitivity 
##   0.9661017   0.9545455   1.0000000

Cross Validations

set.seed(222)


prediction_accuracy_iris <- 
  function(test_and_training) { 
    confusion <- vector(mode = "list", 
                     length = length(test_and_training))
    akurasi_iris <- vector(mode = "list", 
                     length = length(test_and_training))
    for (i in seq_along(test_and_training)) {
      training <- 
        test_and_training[[i]]$training
      test <- test_and_training[[i]]$test 
      model <- training %>%
        ctree(SpeciesNum ~ SepalLengthCm + SepalLengthCm+
          PetalLengthCm + PetalWidthCm, data = .)
      classifications <- 
        classify(predict(model, test))
      targets <- test$Species 
      confusion[[i]] <-  table(test$Species, classifications, dnn= c("Data", "predictions")) 
      akurasi_iris[[i]] <- prediction_summary(confusion[[i]])
      
      }
    return(list(confusion, akurasi_iris))
    
  }


datairis %>%
  partition(5, c(training = 0.6, test = 0.4)) %>%
  prediction_accuracy_iris
## [[1]]
## [[1]][[1]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         12          0         0
##   Iris-versicolor      0         13         1
##   Iris-virginica       0          1        22
## 
## [[1]][[2]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         15          1         0
##   Iris-versicolor      0         15         1
##   Iris-virginica       0          0        17
## 
## [[1]][[3]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         14          0         0
##   Iris-versicolor      0         16         2
##   Iris-virginica       0          0        17
## 
## [[1]][[4]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         11          2         0
##   Iris-versicolor      0         20         0
##   Iris-virginica       0          0        16
## 
## [[1]][[5]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         15          0         0
##   Iris-versicolor      0         18         1
##   Iris-virginica       0          0        15
## 
## 
## [[2]]
## [[2]][[1]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[2]]
##    accuracy specificity sensitivity 
##   0.9591837   0.9375000   1.0000000 
## 
## [[2]][[3]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[4]]
##    accuracy specificity sensitivity 
##   0.9591837   0.8461538   1.0000000 
## 
## [[2]][[5]]
##    accuracy specificity sensitivity 
##   0.9795918   1.0000000   1.0000000
Decision_tree <- 
  datairis %>%
  partition(5, c(training = 0.6, test = 0.4)) %>%
  prediction_accuracy_iris


mean_accuracy <-
  function(listdata) {
    akurasi <- vector(mode = "list", 
                      length = length(listdata))
    for (i in seq_along(listdata)) {
      akurasi[i] <- listdata[[2]][[i]][1]
      
      }
    akurasi <-
      unlist(akurasi)
    return(mean(akurasi))
  }


mean_accuracy(Decision_tree)
## [1] 0.9387755
tree_accuracy <- 
  mean_accuracy(Decision_tree)
model2 %>%
  plot



Random Forest

Memuat Package

library(randomForest)

Model Dataset Full

set.seed(3)


# Model Regresi Logistik ----
model3 <-
  datairis%>%
  randomForest(SpeciesNum ~ SepalLengthCm + SepalLengthCm+
          PetalLengthCm + PetalWidthCm, data = .)

summary(model3)
##                 Length Class  Mode     
## call              3    -none- call     
## type              1    -none- character
## predicted       122    -none- numeric  
## mse             500    -none- numeric  
## rsq             500    -none- numeric  
## oob.times       122    -none- numeric  
## importance        3    -none- numeric  
## importanceSD      0    -none- NULL     
## localImportance   0    -none- NULL     
## proximity         0    -none- NULL     
## ntree             1    -none- numeric  
## mtry              1    -none- numeric  
## forest           11    -none- list     
## coefs             0    -none- NULL     
## y               122    -none- numeric  
## test              0    -none- NULL     
## inbag             0    -none- NULL     
## terms             3    terms  call

Confusion Matrix

classified3 <- classify(predict(model3, datairis))

table(datairis$Species, classified3, dnn= c("Data", "predictions"))
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         38          0         0
##   Iris-versicolor      0         40         1
##   Iris-virginica       0          1        42
confusion3 <-
  table(datairis$Species, classified3, dnn= c("Data", "predictions"))

Akurasi Spesifikasi Sensitifiti

prediction_summary(confusion3)
##    accuracy specificity sensitivity 
##   0.9836066   1.0000000   1.0000000

Model Dengan Sampling

set.seed(33)
# Model Regresi ----
model33 <-
  training_data%>%
  randomForest(SpeciesNum ~ SepalLengthCm + SepalLengthCm+
          PetalLengthCm + PetalWidthCm, data = .)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
summary(model33)
##                 Length Class  Mode     
## call              3    -none- call     
## type              1    -none- character
## predicted        63    -none- numeric  
## mse             500    -none- numeric  
## rsq             500    -none- numeric  
## oob.times        63    -none- numeric  
## importance        3    -none- numeric  
## importanceSD      0    -none- NULL     
## localImportance   0    -none- NULL     
## proximity         0    -none- NULL     
## ntree             1    -none- numeric  
## mtry              1    -none- numeric  
## forest           11    -none- list     
## coefs             0    -none- NULL     
## y                63    -none- numeric  
## test              0    -none- NULL     
## inbag             0    -none- NULL     
## terms             3    terms  call
classified33 <- classify(predict(model33, test_data))

table(test_data$Species, classified33, dnn= c("Data", "predictions"))
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         22          0         0
##   Iris-versicolor      0         17         1
##   Iris-virginica       0          0        19
confusion33 <-
  table(test_data$Species, classified33, dnn= c("Data", "predictions"))

prediction_summary(confusion33)
##    accuracy specificity sensitivity 
##   0.9830508   1.0000000   1.0000000

Cross Validations

set.seed(333)


prediction_accuracy_iris <- 
  function(test_and_training) { 
    confusion <- vector(mode = "list", 
                     length = length(test_and_training))
    akurasi_iris <- vector(mode = "list", 
                     length = length(test_and_training))
    for (i in seq_along(test_and_training)) {
      training <- 
        test_and_training[[i]]$training
      test <- test_and_training[[i]]$test 
      model <- training %>%
        randomForest(SpeciesNum ~ SepalLengthCm + SepalLengthCm+
          PetalLengthCm + PetalWidthCm, data = .)
      classifications <- 
        classify(predict(model, test))
      targets <- test$Species 
      confusion[[i]] <-  table(test$Species, classifications, dnn= c("Data", "predictions")) 
      akurasi_iris[[i]] <- prediction_summary(confusion[[i]])
      
      }
    return(list(confusion, akurasi_iris))
    
  }


datairis %>%
  partition(5, c(training = 0.6, test = 0.4)) %>%
  prediction_accuracy_iris
## [[1]]
## [[1]][[1]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         14          0         0
##   Iris-versicolor      0         14         1
##   Iris-virginica       0          3        17
## 
## [[1]][[2]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         13          0         0
##   Iris-versicolor      0         15         1
##   Iris-virginica       0          1        19
## 
## [[1]][[3]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         15          0         0
##   Iris-versicolor      0         17         3
##   Iris-virginica       0          0        14
## 
## [[1]][[4]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         12          0         0
##   Iris-versicolor      0         17         1
##   Iris-virginica       0          1        18
## 
## [[1]][[5]]
##                  predictions
## Data              setosa versicolor virginica
##   Iris-setosa         20          0         0
##   Iris-versicolor      0         14         2
##   Iris-virginica       0          1        12
## 
## 
## [[2]]
## [[2]][[1]]
##    accuracy specificity sensitivity 
##   0.9183673   1.0000000   1.0000000 
## 
## [[2]][[2]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[3]]
##    accuracy specificity sensitivity 
##   0.9387755   1.0000000   1.0000000 
## 
## [[2]][[4]]
##    accuracy specificity sensitivity 
##   0.9591837   1.0000000   1.0000000 
## 
## [[2]][[5]]
##    accuracy specificity sensitivity 
##   0.9387755   1.0000000   1.0000000
Random_Forest <- 
  datairis %>%
  partition(5, c(training = 0.6, test = 0.4)) %>%
  prediction_accuracy_iris


mean_accuracy <-
  function(listdata) {
    akurasi <- vector(mode = "list", 
                      length = length(listdata))
    for (i in seq_along(listdata)) {
      akurasi[i] <- listdata[[2]][[i]][1]
      
      }
    akurasi <-
      unlist(akurasi)
    return(mean(akurasi))
  }


mean_accuracy(Random_Forest)
## [1] 0.9591837
Random_Forest_accuracy <- 
  mean_accuracy(Random_Forest)

Tabel Akurasi dari Ketiga Metode

tribble(~ Regresi, ~ Tree, ~ Random_Forest,
        regresi_accuracy, tree_accuracy, Random_Forest_accuracy)
## # A tibble: 1 x 3
##   Regresi  Tree Random_Forest
##     <dbl> <dbl>         <dbl>
## 1   0.969 0.939         0.959

Akurasi Tertinggi adalah Regresi.


Mencoba Menebak Jenis Bunga Berdasarkan Data Submission

Dataset

submission
##     Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
## 1  136           4.6          3.6           1.0          0.2
## 2  137           5.0          3.2           1.2          0.2
## 3  138           5.8          4.0           1.2          0.2
## 4  139           5.3          3.7           1.5          0.2
## 5  140           5.4          3.7           1.5          0.2
## 6  141           5.6          2.9           3.6          1.3
## 7  142           5.5          2.3           4.0          1.3
## 8  143           5.2          2.7           3.9          1.4
## 9  144           6.6          3.0           4.4          1.4
## 10 145           6.7          3.1           4.4          1.4
## 11 146           7.2          3.2           6.0          1.8
## 12 147           7.3          2.9           6.3          1.8
## 13 148           6.3          2.5           5.0          1.9
## 14 149           6.3          2.8           5.1          1.5
## 15 150           6.2          2.8           4.8          1.8

Hasil Classifications

classify(predict(model1, submission))
##            1            2            3            4            5            6 
##     "setosa"     "setosa"     "setosa"     "setosa"     "setosa" "versicolor" 
##            7            8            9           10           11           12 
## "versicolor" "versicolor" "versicolor" "versicolor"  "virginica"  "virginica" 
##           13           14           15 
##  "virginica" "versicolor"  "virginica"
submission %>%
  mutate( classify(predict(model1, submission)))
##     Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
## 1  136           4.6          3.6           1.0          0.2
## 2  137           5.0          3.2           1.2          0.2
## 3  138           5.8          4.0           1.2          0.2
## 4  139           5.3          3.7           1.5          0.2
## 5  140           5.4          3.7           1.5          0.2
## 6  141           5.6          2.9           3.6          1.3
## 7  142           5.5          2.3           4.0          1.3
## 8  143           5.2          2.7           3.9          1.4
## 9  144           6.6          3.0           4.4          1.4
## 10 145           6.7          3.1           4.4          1.4
## 11 146           7.2          3.2           6.0          1.8
## 12 147           7.3          2.9           6.3          1.8
## 13 148           6.3          2.5           5.0          1.9
## 14 149           6.3          2.8           5.1          1.5
## 15 150           6.2          2.8           4.8          1.8
##    classify(predict(model1, submission))
## 1                                 setosa
## 2                                 setosa
## 3                                 setosa
## 4                                 setosa
## 5                                 setosa
## 6                             versicolor
## 7                             versicolor
## 8                             versicolor
## 9                             versicolor
## 10                            versicolor
## 11                             virginica
## 12                             virginica
## 13                             virginica
## 14                            versicolor
## 15                             virginica