install.packages("Rcmdr")
## package 'Rcmdr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
install.packages("Rcmdrmisc")
## Warning: package 'Rcmdrmisc' is not available (for R version 3.6.0)
## Warning: Perhaps you meant 'RcmdrMisc' ?
install.packages("rlang")
## package 'rlang' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
install.packages("car")
## package 'car' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
library(Rcmdr)
## Warning: package 'Rcmdr' was built under R version 3.6.1
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Warning: package 'car' was built under R version 3.6.1
## Loading required package: carData
## Loading required package: sandwich
## Loading required package: effects
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## The Commander GUI is launched only in interactive sessions
## 
## Attaching package: 'Rcmdr'
## The following object is masked from 'package:car':
## 
##     Confint
## The following object is masked from 'package:base':
## 
##     errorCondition
library(RcmdrMisc)
library(class)
## Warning: package 'class' was built under R version 3.6.1
library(rlang)
## Warning: package 'rlang' was built under R version 3.6.1
library(car)
# Load binary and trinary data
binary_data <- read.csv("C:/Users/Owner/Desktop/Lenin Files/Data Sciences/Assignments/binary-classifier-data.csv")
trinary_data <- read.csv("C:/Users/Owner/Desktop/Lenin Files/Data Sciences/Assignments/trinary-classifier-data.csv")
head(binary_data)
##   label        x        y
## 1     0 70.88469 83.17702
## 2     0 74.97176 87.92922
## 3     0 73.78333 92.20325
## 4     0 66.40747 81.10617
## 5     0 69.07399 84.53739
## 6     0 72.23616 86.38403
head(trinary_data)
##   label        x        y
## 1     0 30.08387 39.63094
## 2     0 31.27613 51.77511
## 3     0 34.12138 49.27575
## 4     0 32.58222 41.23300
## 5     0 34.65069 45.47956
## 6     0 33.80513 44.24656
#Scatter plot of binary dataset
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
library(ggplot2)
scatterbin <- ggplot(binary_data, aes(x, y))
scatterbinary <- scatterbin + geom_point() + geom_smooth(method = "lm", aes(fill = label), alpha = 0.1) + labs(x = "Value of X", y = "Value of y", colour = "label")
scatterbinary

#Scatter plot of trinary dataset
scattertrin <- ggplot(trinary_data, aes(x, y))
scattertrinary <- scattertrin + geom_point() + geom_smooth(method = "lm", aes(fill = label), alpha = 0.1) + labs(x = "Value of X", y = "Value of y", colour = "label")
scattertrinary

# Data Normalization
binary_data1 <- binary_data[, c("x", "y")]
trinary_data1 <- trinary_data[, c("x", "y")]
# Create train and test datasets for binary data
set.seed(111)
dat.binary <- sample(1:nrow(binary_data1),size=nrow(binary_data1)*0.7,replace = FALSE) #random selection of 70% data.
 
train.binary <- binary_data[dat.binary,] # 70% training data
test.binary <- binary_data[-dat.binary,] # remaining 30% test data
# Create train and test datasets for trinary data
set.seed(123)
dat.d <- sample(1:nrow(trinary_data1),size=nrow(trinary_data1)*0.7,replace = FALSE) #random selection of 70% data.
 
train.trinary <- trinary_data[dat.d,] # 70% training data
test.trinary <- trinary_data[-dat.d,] # remaining 30% test data
#Creating seperate dataframe for binary label data
train.binarylabels <- binary_data[dat.binary,1,drop=TRUE]
test.binarylabels <-binary_data[-dat.binary,1,drop=TRUE]
#Find the number of observations in binary train data
NROW(train.binary) 
## [1] 1048
#Find the number of observations in binary test data
NROW(test.binary)
## [1] 450
#Find the number of observations in binary train data
NROW(train.binarylabels) 
## [1] 1048
#Find the number of observations in binary test data
NROW(test.binarylabels)
## [1] 450
#Build models with different K values for binary data
#Store K values and accuracy values in a dataframe for different k values
library(class)
list2 <- list(3,5,10,15,20,25)
i=1
k.binaryaccur=1
for (i in list2){
    knn.mod <- knn(train=train.binary, test=test.binary, cl=train.binarylabels, k=i)
    k.binaryaccur[i] <- 100 * sum(test.binarylabels == knn.mod)/NROW(test.binarylabels)
    k=i
    cat(k,'=',k.binaryaccur[i],'')
    }
## 3 = 97.55556 5 = 97.33333 10 = 97.55556 15 = 97.55556 20 = 96.66667 25 = 96.44444
#Creating seperate dataframe for trinary label data
train.trinarylabels <- trinary_data[dat.d,1,drop=TRUE]
test.trinarylabels <-trinary_data[-dat.d,1,drop=TRUE]
#Find the number of observations in trinary train data
NROW(train.trinary) 
## [1] 1097
#Find the number of observations in trinary test data
NROW(test.trinary)
## [1] 471
#Find the number of observations in trinary train data
NROW(train.trinarylabels) 
## [1] 1097
#Find the number of observations in trinary test data
NROW(test.trinarylabels)
## [1] 471
#Store K values and accuracy values in a dataframe for different k values
library(class)
list3 <- list(3,5,10,15,20,25)
j=1
k.trinaryaccur=1
for (j in list3){
    knn.modtri <- knn(train=train.trinary, test=test.trinary, cl=train.trinarylabels, k=j)
    k.trinaryaccur[j] <- 100 * sum(test.trinarylabels == knn.modtri)/NROW(test.trinarylabels)
    k=j
    cat(k,'=',k.trinaryaccur[j],'')
    }
## 3 = 93.20594 5 = 92.14437 10 = 90.23355 15 = 89.17197 20 = 87.68577 25 = 86.83652
# Plot Accuracy values with k values
plot(k.trinaryaccur, type="b", xlab="K- Value",ylab="Accuracy level")

# Plot Accuracy values with k values
plot(k.binaryaccur, type="b", xlab="K- Value",ylab="Accuracy level")