ir_url <- 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris <- read.csv(url(ir_url), header=F)
names(iris) <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
summary(iris)
  Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300  
 Mean   :5.843   Mean   :3.054   Mean   :3.759   Mean   :1.199  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
            Species  
 Iris-setosa    :50  
 Iris-versicolor:50  
 Iris-virginica :50  
                     
                     
                     
summary(iris[c("Petal.Width", "Sepal.Width")])
  Petal.Width     Sepal.Width   
 Min.   :0.100   Min.   :2.000  
 1st Qu.:0.300   1st Qu.:2.800  
 Median :1.300   Median :3.000  
 Mean   :1.199   Mean   :3.054  
 3rd Qu.:1.800   3rd Qu.:3.300  
 Max.   :2.500   Max.   :4.400  
summary(iris[c("Petal.Length", "Sepal.Length")])
  Petal.Length    Sepal.Length  
 Min.   :1.000   Min.   :4.300  
 1st Qu.:1.600   1st Qu.:5.100  
 Median :4.350   Median :5.800  
 Mean   :3.759   Mean   :5.843  
 3rd Qu.:5.100   3rd Qu.:6.400  
 Max.   :6.900   Max.   :7.900  
install.packages('ggvis')
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(ggvis)
iris %>% ggvis(~Sepal.Length, ~Sepal.Width, fill = ~Species) %>% layer_points()
iris %>% ggvis(~Petal.Length, ~Petal.Width, fill=~Species) %>% layer_points()
install.packages("class")
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(class)
set.seed(3465)
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.8, 0.2))
irisTrain <- iris[ind==1, 1:4]
irisTest <- iris[ind==2, 1:4]
irisTrainLabels <- iris[ind==1, 5]
irisTestLabels <- iris[ind==2, 5]
iris_pred <- knn(train=irisTrain, test=irisTest, cl=irisTrainLabels, k=3)
iris_pred
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-setosa</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-versicolor</li>
    <li>Iris-virginica</li>
    <li>Iris-virginica</li>
    <li>Iris-virginica</li>
    <li>Iris-virginica</li>
install.packages("gmodels")
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(gmodels)
CrossTable(x=irisTestLabels, y=iris_pred, prop.chisq=F, prop.r=F, prop.c=F, prop.t=F)
   Cell Contents
|-------------------------|
|                       N |
|-------------------------|

 
Total Observations in Table:  20 

 
                | iris_pred 
 irisTestLabels |     Iris-setosa | Iris-versicolor |  Iris-virginica |       Row Total | 
----------------|-----------------|-----------------|-----------------|-----------------|
    Iris-setosa |               8 |               0 |               0 |               8 | 
----------------|-----------------|-----------------|-----------------|-----------------|
Iris-versicolor |               0 |               8 |               0 |               8 | 
----------------|-----------------|-----------------|-----------------|-----------------|
 Iris-virginica |               0 |               0 |               4 |               4 | 
----------------|-----------------|-----------------|-----------------|-----------------|
   Column Total |               8 |               8 |               4 |              20 | 
----------------|-----------------|-----------------|-----------------|-----------------|

 
install.packages('caret')
The downloaded binary packages are in
    /var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(caret)
Loading required package: lattice
Loading required package: ggplot2

Attaching package: ‘ggplot2’

The following object is masked from ‘package:ggvis’:

    resolution
set.seed(3456)
trainIndex <- createDataPartition(iris$Species, p=0.8, list=F, times=1)
head(trainIndex)
<tr><td>1</td></tr>
<tr><td>2</td></tr>
<tr><td>4</td></tr>
<tr><td>5</td></tr>
<tr><td>6</td></tr>
<tr><td>8</td></tr>
Resample1
irisTrain <- iris[trainIndex]
irisTest <- iris[-trainIndex]
normalize <- function(x) {
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}
iris_x <- as.data.frame(lapply(iris[1:4], normalize))
summary(iris)
  Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300  
 Mean   :5.843   Mean   :3.054   Mean   :3.759   Mean   :1.199  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
            Species  
 Iris-setosa    :50  
 Iris-versicolor:50  
 Iris-virginica :50  
                     
                     
                     
summary(iris_x)
  Sepal.Length     Sepal.Width      Petal.Length     Petal.Width     
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
 1st Qu.:0.2222   1st Qu.:0.3333   1st Qu.:0.1017   1st Qu.:0.08333  
 Median :0.4167   Median :0.4167   Median :0.5678   Median :0.50000  
 Mean   :0.4287   Mean   :0.4392   Mean   :0.4676   Mean   :0.45778  
 3rd Qu.:0.5833   3rd Qu.:0.5417   3rd Qu.:0.6949   3rd Qu.:0.70833  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
#then re-run the KNN algorithm on the normalized data, and the hope is it would yield more accurate predictions.