IMPORT DATA
getwd()
## [1] "/Users/anand/RProjects/KNN"
setwd("/Users/anand/RProjects/KNN")
bankloan <- read.csv("BANK LOAN.csv")
head(bankloan)
## SN AGE EMPLOY ADDRESS DEBTINC CREDDEBT OTHDEBT DEFAULTER
## 1 1 3 17 12 9.3 11.36 5.01 1
## 2 2 1 10 6 17.3 1.36 4.00 0
## 3 3 2 15 14 5.5 0.86 2.17 0
## 4 4 3 15 14 2.9 2.66 0.82 0
## 5 5 1 2 0 17.3 1.79 3.06 1
## 6 6 3 5 5 10.2 0.39 2.16 0
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
CREATE INDEX TO SPLIT DATA INTO TRAIN AND TEST
index <- createDataPartition(bankloan$SN, p = 0.7,list = FALSE)
**SUBSET THE DATA TO REMOVE IN-ESSENTIAL COLUMNS*8
bankloan2 <- subset(bankloan, select = c(-AGE, -SN, -DEFAULTER))
CREATE SCALED DATA TO FORMULATE TRAIN AND TEST DATA
bankloan3 <- scale(bankloan2)
head(bankloan3)
## EMPLOY ADDRESS DEBTINC CREDDEBT OTHDEBT
## 1 1.2933881 0.5452741 -0.140697021 4.63182528 0.5936903584
## 2 0.2420275 -0.3338627 1.031080669 -0.09137366 0.2864682866
## 3 0.9929994 0.8383197 -0.697291424 -0.32753360 -0.2701816059
## 4 0.9929994 0.8383197 -1.078119173 0.52264220 -0.6808249692
## 5 -0.9595275 -1.2129994 1.031080669 0.11172390 0.0005388336
## 6 -0.5089444 -0.4803855 -0.008872031 -0.54952395 -0.2732234086
traindata <- bankloan3[index,]
testdata <- bankloan3[-index,]
CREATE CLASS VECTORS
Ytrain <- bankloan$DEFAULTER[index]
Ytest <- bankloan$DEFAULTER[-index]
FORMULATE MODEL
library(class)
knnmodel <- knn(traindata, testdata, k=round(sqrt(nrow(traindata)),0)-1, cl=Ytrain)
cMatrix <- t(table(Ytest, knnmodel)) ;cMatrix
## Ytest
## knnmodel 0 1
## 0 138 49
## 1 3 18
LET’S CALCULATE CONFUSION MATRIX, SENSITIVITY, SPECIFICITY
library(caret)
sensitivity(cMatrix)
## [1] 0.9787234
specificity(cMatrix)
## [1] 0.2686567