CW4325

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(class)

## Warning: package 'class' was built under R version 4.4.3

library(gmodels)

## Warning: package 'gmodels' was built under R version 4.4.3

# Load dataset
data(iris)

# Step 1: Data Exploration
str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

summary(iris)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

table(iris$Species)

## 
##     setosa versicolor  virginica 
##         50         50         50

# Step 2: Normal Train-Test Split (70%-30%)
set.seed(123)  # Ensure reproducibility
ind <- sample(2, nrow(iris), replace = TRUE, prob = c(0.7, 0.3))

trainData <- iris[ind == 1, ]
testData <- iris[ind == 2, ]

trainData1 <- trainData[, -5]
testData1 <- testData[, -5]

trainLabels <- trainData$Species
testLabels <- testData$Species

# Train KNN Model (Normal Case)
test_pred <- knn(train = trainData1, test = testData1, cl = trainLabels, k = 3)
CrossTable(x = testLabels, y = test_pred, prop.chisq = FALSE)

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  44 
## 
##  
##              | test_pred 
##   testLabels |     setosa | versicolor |  virginica |  Row Total | 
## -------------|------------|------------|------------|------------|
##       setosa |         15 |          0 |          0 |         15 | 
##              |      1.000 |      0.000 |      0.000 |      0.341 | 
##              |      1.000 |      0.000 |      0.000 |            | 
##              |      0.341 |      0.000 |      0.000 |            | 
## -------------|------------|------------|------------|------------|
##   versicolor |          0 |         12 |          2 |         14 | 
##              |      0.000 |      0.857 |      0.143 |      0.318 | 
##              |      0.000 |      0.923 |      0.125 |            | 
##              |      0.000 |      0.273 |      0.045 |            | 
## -------------|------------|------------|------------|------------|
##    virginica |          0 |          1 |         14 |         15 | 
##              |      0.000 |      0.067 |      0.933 |      0.341 | 
##              |      0.000 |      0.077 |      0.875 |            | 
##              |      0.000 |      0.023 |      0.318 |            | 
## -------------|------------|------------|------------|------------|
## Column Total |         15 |         13 |         16 |         44 | 
##              |      0.341 |      0.295 |      0.364 |            | 
## -------------|------------|------------|------------|------------|
## 
##

# Calculate Accuracy
accuracy <- sum(testLabels == test_pred) / length(testLabels)
print(paste("Normal Train-Test Split Accuracy:", round(accuracy * 100, 2), "%"))

## [1] "Normal Train-Test Split Accuracy: 93.18 %"

# ---- 100% Accuracy Test ----

# Use the entire dataset as both training and testing set
trainData_full <- iris[, -5]  # Features only
trainLabels_full <- iris$Species
testData_full <- trainData_full  # Test set is the same as training set
testLabels_full <- trainLabels_full

# Train KNN Model with k=1
test_pred_full <- knn(train = trainData_full, test = testData_full, cl = trainLabels_full, k = 1)

# CrossTable for 100% accuracy test
CrossTable(x = testLabels_full, y = test_pred_full, prop.chisq = FALSE)

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  150 
## 
##  
##                 | test_pred_full 
## testLabels_full |     setosa | versicolor |  virginica |  Row Total | 
## ----------------|------------|------------|------------|------------|
##          setosa |         50 |          0 |          0 |         50 | 
##                 |      1.000 |      0.000 |      0.000 |      0.333 | 
##                 |      1.000 |      0.000 |      0.000 |            | 
##                 |      0.333 |      0.000 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##      versicolor |          0 |         50 |          0 |         50 | 
##                 |      0.000 |      1.000 |      0.000 |      0.333 | 
##                 |      0.000 |      1.000 |      0.000 |            | 
##                 |      0.000 |      0.333 |      0.000 |            | 
## ----------------|------------|------------|------------|------------|
##       virginica |          0 |          0 |         50 |         50 | 
##                 |      0.000 |      0.000 |      1.000 |      0.333 | 
##                 |      0.000 |      0.000 |      1.000 |            | 
##                 |      0.000 |      0.000 |      0.333 |            | 
## ----------------|------------|------------|------------|------------|
##    Column Total |         50 |         50 |         50 |        150 | 
##                 |      0.333 |      0.333 |      0.333 |            | 
## ----------------|------------|------------|------------|------------|
## 
##

# Accuracy will be 100%
accuracy_full <- sum(testLabels_full == test_pred_full) / length(testLabels_full)
print(paste("100% Accuracy Test:", round(accuracy_full * 100, 2), "%"))

## [1] "100% Accuracy Test: 100 %"

CW4325

KamRon Jackson

2025-04-03

R Markdown

Including Plots