Kasus Prediktor Interval

Kode 1

Data Iris

summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
summary(iris[c("Petal.Width", "Sepal.Width")])
##   Petal.Width     Sepal.Width   
##  Min.   :0.100   Min.   :2.000  
##  1st Qu.:0.300   1st Qu.:2.800  
##  Median :1.300   Median :3.000  
##  Mean   :1.199   Mean   :3.057  
##  3rd Qu.:1.800   3rd Qu.:3.300  
##  Max.   :2.500   Max.   :4.400
library(class)

normalize <- function(x) {
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}

iris_norm <- as.data.frame(lapply(iris[1:4], normalize))
summary(iris_norm)
##   Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
##  Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
##  Mean   :0.4287   Mean   :0.4406   Mean   :0.4675   Mean   :0.45806  
##  3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000
set.seed(1234)
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.67, 0.33))
iris.training <- iris[ind==1, 1:4]
iris.test <- iris[ind==2, 1:4]

iris.trainLabels <- iris[ind==1, 5]
iris.testLabels <- iris[ind==2, 5]

iris_pred <- knn(train = iris.training, test = iris.test, cl = iris.trainLabels, k=3)

iris_pred
##  [1] setosa     setosa     setosa     setosa     setosa     setosa    
##  [7] setosa     setosa     setosa     setosa     setosa     setosa    
## [13] versicolor versicolor versicolor versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] virginica  virginica  virginica  virginica  versicolor virginica 
## [31] virginica  virginica  virginica  virginica  virginica  virginica 
## [37] virginica  virginica  virginica  virginica 
## Levels: setosa versicolor virginica
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.1.3
CrossTable(x = iris.testLabels, y = iris_pred, prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  40 
## 
##  
##                 | iris_pred 
## iris.testLabels |     setosa | versicolor |  virginica |  Row Total | 
## ----------------|------------|------------|------------|------------|
##          setosa |         12 |          0 |          0 |         12 | 
##                 |      1.000 |      0.000 |      0.000 |      0.300 | 
##                 |      1.000 |      0.000 |      0.000 |            | 
##                 |      0.300 |      0.000 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##      versicolor |          0 |         12 |          0 |         12 | 
##                 |      0.000 |      1.000 |      0.000 |      0.300 | 
##                 |      0.000 |      0.923 |      0.000 |            | 
##                 |      0.000 |      0.300 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##       virginica |          0 |          1 |         15 |         16 | 
##                 |      0.000 |      0.062 |      0.938 |      0.400 | 
##                 |      0.000 |      0.077 |      1.000 |            | 
##                 |      0.000 |      0.025 |      0.375 |            | 
## ----------------|------------|------------|------------|------------|
##    Column Total |         12 |         13 |         15 |         40 | 
##                 |      0.300 |      0.325 |      0.375 |            | 
## ----------------|------------|------------|------------|------------|
## 
## 

Kode 2

#kNN Tutotrial on Iris Data Set####
library(class) #Has the knn function
set.seed(4948493) #Set the seed for reproducibility
#Sample the Iris data set (70% train, 30% test)
ir_sample<-sample(1:nrow(iris),size=nrow(iris)*.7)
ir_train<-iris[ir_sample,] #Select the 70% of rows
ir_test<-iris[-ir_sample,] #Select the 30% of rows
 
#First Attempt to Determine Right K####
iris_acc<-numeric() #Holding variable
 
for(i in 1:50){
 #Apply knn with k = i
 predict<-knn(ir_train[,-5],ir_test[,-5],
 ir_train$Species,k=i)
 iris_acc<-c(iris_acc,
 mean(predict==ir_test$Species))
}
#Plot k= 1 through 30
plot(1-iris_acc,type="l",ylab="Error Rate",
 xlab="K",main="Error Rate for Iris With Varying K")

#Try many Samples of Iris Data Set to Validate K####
trial_sum<-numeric(20)
trial_n<-numeric(20)
set.seed(6033850)
for(i in 1:100){
 ir_sample<-sample(1:nrow(iris),size=nrow(iris)*.7)
 ir_train<-iris[ir_sample,]
 ir_test<-iris[-ir_sample,]
 test_size<-nrow(ir_test)
 for(j in 1:20){
 predict<-knn(ir_train[,-5],ir_test[,-5],
 ir_train$Species,k=j)
 trial_sum[j]<-trial_sum[j]+sum(predict==ir_test$Species)
 trial_n[j]<-trial_n[j]+test_size
 }
}
 
plot(1-trial_sum / trial_n,type="l",ylab="Error Rate",
 xlab="K",main="Error Rate for Iris With Varying K (100 Samples)")