install.packages("Rcmdr")
## package 'Rcmdr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
install.packages("Rcmdrmisc")
## Warning: package 'Rcmdrmisc' is not available (for R version 3.6.0)
## Warning: Perhaps you meant 'RcmdrMisc' ?
install.packages("rlang")
## package 'rlang' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
install.packages("car")
## package 'car' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
library(Rcmdr)
## Warning: package 'Rcmdr' was built under R version 3.6.1
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: car
## Warning: package 'car' was built under R version 3.6.1
## Loading required package: carData
## Loading required package: sandwich
## Loading required package: effects
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## The Commander GUI is launched only in interactive sessions
##
## Attaching package: 'Rcmdr'
## The following object is masked from 'package:car':
##
## Confint
## The following object is masked from 'package:base':
##
## errorCondition
library(RcmdrMisc)
library(class)
## Warning: package 'class' was built under R version 3.6.1
library(rlang)
## Warning: package 'rlang' was built under R version 3.6.1
library(car)
# Load binary and trinary data
binary_data <- read.csv("C:/Users/Owner/Desktop/Lenin Files/Data Sciences/Assignments/binary-classifier-data.csv")
trinary_data <- read.csv("C:/Users/Owner/Desktop/Lenin Files/Data Sciences/Assignments/trinary-classifier-data.csv")
head(binary_data)
## label x y
## 1 0 70.88469 83.17702
## 2 0 74.97176 87.92922
## 3 0 73.78333 92.20325
## 4 0 66.40747 81.10617
## 5 0 69.07399 84.53739
## 6 0 72.23616 86.38403
head(trinary_data)
## label x y
## 1 0 30.08387 39.63094
## 2 0 31.27613 51.77511
## 3 0 34.12138 49.27575
## 4 0 32.58222 41.23300
## 5 0 34.65069 45.47956
## 6 0 33.80513 44.24656
#Scatter plot of binary dataset
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Owner\AppData\Local\Temp\RtmpgRxPWg\downloaded_packages
library(ggplot2)
scatterbin <- ggplot(binary_data, aes(x, y))
scatterbinary <- scatterbin + geom_point() + geom_smooth(method = "lm", aes(fill = label), alpha = 0.1) + labs(x = "Value of X", y = "Value of y", colour = "label")
scatterbinary

#Scatter plot of trinary dataset
scattertrin <- ggplot(trinary_data, aes(x, y))
scattertrinary <- scattertrin + geom_point() + geom_smooth(method = "lm", aes(fill = label), alpha = 0.1) + labs(x = "Value of X", y = "Value of y", colour = "label")
scattertrinary

# Data Normalization
binary_data1 <- binary_data[, c("x", "y")]
trinary_data1 <- trinary_data[, c("x", "y")]
# Create train and test datasets for binary data
set.seed(111)
dat.binary <- sample(1:nrow(binary_data1),size=nrow(binary_data1)*0.7,replace = FALSE) #random selection of 70% data.
train.binary <- binary_data[dat.binary,] # 70% training data
test.binary <- binary_data[-dat.binary,] # remaining 30% test data
# Create train and test datasets for trinary data
set.seed(123)
dat.d <- sample(1:nrow(trinary_data1),size=nrow(trinary_data1)*0.7,replace = FALSE) #random selection of 70% data.
train.trinary <- trinary_data[dat.d,] # 70% training data
test.trinary <- trinary_data[-dat.d,] # remaining 30% test data
#Creating seperate dataframe for binary label data
train.binarylabels <- binary_data[dat.binary,1,drop=TRUE]
test.binarylabels <-binary_data[-dat.binary,1,drop=TRUE]
#Find the number of observations in binary train data
NROW(train.binary)
## [1] 1048
#Find the number of observations in binary test data
NROW(test.binary)
## [1] 450
#Find the number of observations in binary train data
NROW(train.binarylabels)
## [1] 1048
#Find the number of observations in binary test data
NROW(test.binarylabels)
## [1] 450
#Build models with different K values for binary data
#Store K values and accuracy values in a dataframe for different k values
library(class)
list2 <- list(3,5,10,15,20,25)
i=1
k.binaryaccur=1
for (i in list2){
knn.mod <- knn(train=train.binary, test=test.binary, cl=train.binarylabels, k=i)
k.binaryaccur[i] <- 100 * sum(test.binarylabels == knn.mod)/NROW(test.binarylabels)
k=i
cat(k,'=',k.binaryaccur[i],'')
}
## 3 = 97.55556 5 = 97.33333 10 = 97.55556 15 = 97.55556 20 = 96.66667 25 = 96.44444
#Creating seperate dataframe for trinary label data
train.trinarylabels <- trinary_data[dat.d,1,drop=TRUE]
test.trinarylabels <-trinary_data[-dat.d,1,drop=TRUE]
#Find the number of observations in trinary train data
NROW(train.trinary)
## [1] 1097
#Find the number of observations in trinary test data
NROW(test.trinary)
## [1] 471
#Find the number of observations in trinary train data
NROW(train.trinarylabels)
## [1] 1097
#Find the number of observations in trinary test data
NROW(test.trinarylabels)
## [1] 471
#Store K values and accuracy values in a dataframe for different k values
library(class)
list3 <- list(3,5,10,15,20,25)
j=1
k.trinaryaccur=1
for (j in list3){
knn.modtri <- knn(train=train.trinary, test=test.trinary, cl=train.trinarylabels, k=j)
k.trinaryaccur[j] <- 100 * sum(test.trinarylabels == knn.modtri)/NROW(test.trinarylabels)
k=j
cat(k,'=',k.trinaryaccur[j],'')
}
## 3 = 93.20594 5 = 92.14437 10 = 90.23355 15 = 89.17197 20 = 87.68577 25 = 86.83652
# Plot Accuracy values with k values
plot(k.trinaryaccur, type="b", xlab="K- Value",ylab="Accuracy level")

# Plot Accuracy values with k values
plot(k.binaryaccur, type="b", xlab="K- Value",ylab="Accuracy level")
