R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

set.seed(150)

iris <- data.frame(iris, stringsAsFactors = TRUE)
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
dim(iris)
## [1] 150   5
table(iris$Species)
## 
##     setosa versicolor  virginica 
##         50         50         50
iris$Species <- factor(iris$Species, levels = c("setosa", "versicolor", "virginica"),labels = c("setosa", "versicolor", "virginica"))

round(prop.table(table(iris$Species)) *100, digits=1)
## 
##     setosa versicolor  virginica 
##       33.3       33.3       33.3
ind <- sample(2, nrow(iris), replace =TRUE, prob=c(0.7, 0.3))
ind
##   [1] 1 1 1 2 1 1 1 1 1 2 1 2 2 1 1 2 1 1 1 1 1 1 1 1 1 2 1 2 1 1 2 2 1 1 2 1 1
##  [38] 1 1 1 1 1 2 1 2 1 1 1 2 1 2 2 2 2 1 1 2 1 1 2 2 2 1 1 1 1 2 1 1 2 1 2 2 1
##  [75] 1 2 2 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 2 1 2 1 1 2 1 1 2 2 1 1 1 1 1 1 1 2 1
## [112] 1 1 2 1 2 1 2 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 2
## [149] 1 1
trainData <- iris[ind==1,]
testData <- iris[ind==2,]


trainData1 <- trainData[,-5]
testData1 <- testData[,-5] 

str(trainData1)
## 'data.frame':    107 obs. of  4 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 5 5.4 4.6 5 4.4 5.4 4.3 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.6 3.9 3.4 3.4 2.9 3.7 3 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.4 1.7 1.4 1.5 1.4 1.5 1.1 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.2 0.1 ...
str(testData1)
## 'data.frame':    43 obs. of  4 variables:
##  $ Sepal.Length: num  4.6 4.9 4.8 4.8 5.7 5 5.2 4.8 5.4 4.9 ...
##  $ Sepal.Width : num  3.1 3.1 3.4 3 4.4 3 3.5 3.1 3.4 3.1 ...
##  $ Petal.Length: num  1.5 1.5 1.6 1.4 1.5 1.6 1.5 1.6 1.5 1.5 ...
##  $ Petal.Width : num  0.2 0.1 0.2 0.1 0.4 0.2 0.2 0.2 0.4 0.2 ...
trainLabels = trainData$Species
testLabels = testData$Species


library(class)
library(gmodels)

iris_test_pred <- knn(train = trainData1, test = testData1, cl = trainLabels, k=10)

iris_test_pred
##  [1] setosa     setosa     setosa     setosa     setosa     setosa    
##  [7] setosa     setosa     setosa     setosa     setosa     setosa    
## [13] setosa     versicolor versicolor virginica  versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor versicolor
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] versicolor versicolor virginica  virginica  virginica  virginica 
## [37] virginica  virginica  virginica  virginica  virginica  virginica 
## [43] virginica 
## Levels: setosa versicolor virginica
CrossTable(x = testLabels, y = iris_test_pred, prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  43 
## 
##  
##              | iris_test_pred 
##   testLabels |     setosa | versicolor |  virginica |  Row Total | 
## -------------|------------|------------|------------|------------|
##       setosa |         13 |          0 |          0 |         13 | 
##              |      1.000 |      0.000 |      0.000 |      0.302 | 
##              |      1.000 |      0.000 |      0.000 |            | 
##              |      0.302 |      0.000 |      0.000 |            | 
## -------------|------------|------------|------------|------------|
##   versicolor |          0 |         18 |          1 |         19 | 
##              |      0.000 |      0.947 |      0.053 |      0.442 | 
##              |      0.000 |      1.000 |      0.083 |            | 
##              |      0.000 |      0.419 |      0.023 |            | 
## -------------|------------|------------|------------|------------|
##    virginica |          0 |          0 |         11 |         11 | 
##              |      0.000 |      0.000 |      1.000 |      0.256 | 
##              |      0.000 |      0.000 |      0.917 |            | 
##              |      0.000 |      0.000 |      0.256 |            | 
## -------------|------------|------------|------------|------------|
## Column Total |         13 |         18 |         12 |         43 | 
##              |      0.302 |      0.419 |      0.279 |            | 
## -------------|------------|------------|------------|------------|
## 
## 

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.