This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
set.seed(150)
iris <- data.frame(iris, stringsAsFactors = TRUE)
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
dim(iris)
## [1] 150 5
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
iris$Species <- factor(iris$Species, levels = c("setosa", "versicolor", "virginica"),labels = c("setosa", "versicolor", "virginica"))
round(prop.table(table(iris$Species)) *100, digits=1)
##
## setosa versicolor virginica
## 33.3 33.3 33.3
ind <- sample(2, nrow(iris), replace =TRUE, prob=c(0.7, 0.3))
ind
## [1] 1 1 1 2 1 1 1 1 1 2 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 2 1 2 1 1 2 2 1 1 2 1 1
## [38] 1 1 1 1 1 2 1 2 1 1 1 2 1 2 2 2 2 1 1 2 1 1 2 2 2 1 1 1 1 2 1 1 2 1 2 2 1
## [75] 1 2 2 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 2 1 2 1 1 2 1 1 2 2 1 1 1 1 1 1 1 2 1
## [112] 1 1 2 1 2 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 2
## [149] 1 1
trainData <- iris[ind==1,]
testData <- iris[ind==2,]
trainData1 <- trainData[,-5]
testData1 <- testData[,-5]
str(trainData1)
## 'data.frame': 107 obs. of 4 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 5 5.4 4.6 5 4.4 5.4 4.3 ...
## $ Sepal.Width : num 3.5 3 3.2 3.6 3.9 3.4 3.4 2.9 3.7 3 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.4 1.7 1.4 1.5 1.4 1.5 1.1 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.2 0.1 ...
str(testData1)
## 'data.frame': 43 obs. of 4 variables:
## $ Sepal.Length: num 4.6 4.9 4.8 4.8 5.7 5 5.2 4.8 5.4 4.9 ...
## $ Sepal.Width : num 3.1 3.1 3.4 3 4.4 3 3.5 3.1 3.4 3.1 ...
## $ Petal.Length: num 1.5 1.5 1.6 1.4 1.5 1.6 1.5 1.6 1.5 1.5 ...
## $ Petal.Width : num 0.2 0.1 0.2 0.1 0.4 0.2 0.2 0.2 0.4 0.2 ...
trainLabels = trainData$Species
testLabels = testData$Species
library(class)
library(gmodels)
iris_test_pred <- knn(train = trainData1, test = testData1, cl = trainLabels, k=10)
iris_test_pred
## [1] setosa setosa setosa setosa setosa setosa
## [7] setosa setosa setosa setosa setosa setosa
## [13] setosa versicolor versicolor virginica versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] versicolor versicolor virginica virginica virginica virginica
## [37] virginica virginica virginica virginica virginica virginica
## [43] virginica
## Levels: setosa versicolor virginica
CrossTable(x = testLabels, y = iris_test_pred, prop.chisq=FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 43
##
##
## | iris_test_pred
## testLabels | setosa | versicolor | virginica | Row Total |
## -------------|------------|------------|------------|------------|
## setosa | 13 | 0 | 0 | 13 |
## | 1.000 | 0.000 | 0.000 | 0.302 |
## | 1.000 | 0.000 | 0.000 | |
## | 0.302 | 0.000 | 0.000 | |
## -------------|------------|------------|------------|------------|
## versicolor | 0 | 18 | 1 | 19 |
## | 0.000 | 0.947 | 0.053 | 0.442 |
## | 0.000 | 1.000 | 0.083 | |
## | 0.000 | 0.419 | 0.023 | |
## -------------|------------|------------|------------|------------|
## virginica | 0 | 0 | 11 | 11 |
## | 0.000 | 0.000 | 1.000 | 0.256 |
## | 0.000 | 0.000 | 0.917 | |
## | 0.000 | 0.000 | 0.256 | |
## -------------|------------|------------|------------|------------|
## Column Total | 13 | 18 | 12 | 43 |
## | 0.302 | 0.419 | 0.279 | |
## -------------|------------|------------|------------|------------|
##
##
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.