ir_url <- 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris <- read.csv(url(ir_url), header=F)
names(iris) <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
summary(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
Median :5.800 Median :3.000 Median :4.350 Median :1.300
Mean :5.843 Mean :3.054 Mean :3.759 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
Species
Iris-setosa :50
Iris-versicolor:50
Iris-virginica :50
summary(iris[c("Petal.Width", "Sepal.Width")])
Petal.Width Sepal.Width
Min. :0.100 Min. :2.000
1st Qu.:0.300 1st Qu.:2.800
Median :1.300 Median :3.000
Mean :1.199 Mean :3.054
3rd Qu.:1.800 3rd Qu.:3.300
Max. :2.500 Max. :4.400
summary(iris[c("Petal.Length", "Sepal.Length")])
Petal.Length Sepal.Length
Min. :1.000 Min. :4.300
1st Qu.:1.600 1st Qu.:5.100
Median :4.350 Median :5.800
Mean :3.759 Mean :5.843
3rd Qu.:5.100 3rd Qu.:6.400
Max. :6.900 Max. :7.900
install.packages('ggvis')
The downloaded binary packages are in
/var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(ggvis)
iris %>% ggvis(~Sepal.Length, ~Sepal.Width, fill = ~Species) %>% layer_points()
iris %>% ggvis(~Petal.Length, ~Petal.Width, fill=~Species) %>% layer_points()
install.packages("class")
The downloaded binary packages are in
/var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(class)
set.seed(3465)
ind <- sample(2, nrow(iris), replace=TRUE, prob=c(0.8, 0.2))
irisTrain <- iris[ind==1, 1:4]
irisTest <- iris[ind==2, 1:4]
irisTrainLabels <- iris[ind==1, 5]
irisTestLabels <- iris[ind==2, 5]
iris_pred <- knn(train=irisTrain, test=irisTest, cl=irisTrainLabels, k=3)
iris_pred
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-setosa</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-versicolor</li>
<li>Iris-virginica</li>
<li>Iris-virginica</li>
<li>Iris-virginica</li>
<li>Iris-virginica</li>
install.packages("gmodels")
The downloaded binary packages are in
/var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(gmodels)
CrossTable(x=irisTestLabels, y=iris_pred, prop.chisq=F, prop.r=F, prop.c=F, prop.t=F)
Cell Contents
|-------------------------|
| N |
|-------------------------|
Total Observations in Table: 20
| iris_pred
irisTestLabels | Iris-setosa | Iris-versicolor | Iris-virginica | Row Total |
----------------|-----------------|-----------------|-----------------|-----------------|
Iris-setosa | 8 | 0 | 0 | 8 |
----------------|-----------------|-----------------|-----------------|-----------------|
Iris-versicolor | 0 | 8 | 0 | 8 |
----------------|-----------------|-----------------|-----------------|-----------------|
Iris-virginica | 0 | 0 | 4 | 4 |
----------------|-----------------|-----------------|-----------------|-----------------|
Column Total | 8 | 8 | 4 | 20 |
----------------|-----------------|-----------------|-----------------|-----------------|
install.packages('caret')
The downloaded binary packages are in
/var/folders/mr/m5qxlyt52516tbgsf2w8nmf80000gn/T//Rtmpn5GSGs/downloaded_packages
library(caret)
Loading required package: lattice
Loading required package: ggplot2
Attaching package: ‘ggplot2’
The following object is masked from ‘package:ggvis’:
resolution
set.seed(3456)
trainIndex <- createDataPartition(iris$Species, p=0.8, list=F, times=1)
head(trainIndex)
|
Resample1
|
<tr><td>1</td></tr>
<tr><td>2</td></tr>
<tr><td>4</td></tr>
<tr><td>5</td></tr>
<tr><td>6</td></tr>
<tr><td>8</td></tr>
irisTrain <- iris[trainIndex]
irisTest <- iris[-trainIndex]
normalize <- function(x) {
num <- x - min(x)
denom <- max(x) - min(x)
return (num/denom)
}
iris_x <- as.data.frame(lapply(iris[1:4], normalize))
summary(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
Median :5.800 Median :3.000 Median :4.350 Median :1.300
Mean :5.843 Mean :3.054 Mean :3.759 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
Species
Iris-setosa :50
Iris-versicolor:50
Iris-virginica :50
summary(iris_x)
Sepal.Length Sepal.Width Petal.Length Petal.Width
Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
1st Qu.:0.2222 1st Qu.:0.3333 1st Qu.:0.1017 1st Qu.:0.08333
Median :0.4167 Median :0.4167 Median :0.5678 Median :0.50000
Mean :0.4287 Mean :0.4392 Mean :0.4676 Mean :0.45778
3rd Qu.:0.5833 3rd Qu.:0.5417 3rd Qu.:0.6949 3rd Qu.:0.70833
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
#then re-run the KNN algorithm on the normalized data, and the hope is it would yield more accurate predictions.