KNN Model

setwd("C:/Users/Manjari/Desktop/Machine learning/Final Report Manjari")
# Read all data
train<- data.frame(read.csv("train.csv",header=T))
# load test datasets
test <- data.frame(read.csv("test.csv", header=T))
# Its dimesions?
dim(train)
## [1] 42000   785
# Just the second row and only pixels not label
pixels<-train[2,2:785]
dim(pixels)
## [1]   1 784
# What is the label
train[2,1]
## [1] 0
# Save in a file but while saving round up [0,255] as [0,1]
write.table(matrix(round(pixels/255),28,28), file = "second.txt", sep = " ", row.names = FALSE, col.names = FALSE)
# Append also to this file label value

write.table(paste("label",train[2,1]), file = "second.txt", sep = " ", row.names = FALSE, col.names = FALSE,append=T)
# Read 2nd row and ignore label column
data<-train[2,2:785]
data<-as.matrix(data,nrow=28,ncol=28)
dim(data)
## [1]   1 784
data<-matrix(data,nrow=28,ncol=28)
dim(data)
## [1] 28 28
##Color ramp def.
colors<-c('white','black')
# Create a function to generate a continuum of colors
#  of desired number of colours from white to black
ramp_pal<-colorRampPalette(colors=colors)
# Draw an image of data over a grid of x(1:28), y(1:28)
image(1:28,1:28,data,main="IInd row. Label=0",col=ramp_pal(256))

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2

library(kknn)
## 
## Attaching package: 'kknn'
## 
## The following object is masked from 'package:caret':
## 
##     contr.dummy
# remove near-zero variance features
badCols <- nearZeroVar(train[, -1])

train <- train[, -(badCols+1)]
 
# optimize knn for k=1:15
# and kernel=triangular, rectangular, or gaussian
model <- train.kknn(as.factor(label) ~ ., train, kmax=15, kernel= c("triangular","rectangular","gaussian"))
 
# print out best parameters and prediction error
print(paste("Best parameters:", "kernel =", model$best.parameters$kernel, ", k =", model$best.parameters$k))
## [1] "Best parameters: kernel = triangular , k = 6"
print(model$MISCLASS)
##    triangular rectangular   gaussian
## 1  0.03216667  0.03216667 0.03216667
## 2  0.03216667  0.03885714 0.03216667
## 3  0.03026190  0.03197619 0.03042857
## 4  0.02923810  0.03238095 0.02909524
## 5  0.02842857  0.03247619 0.03088095
## 6  0.02764286  0.03297619 0.03045238
## 7  0.02769048  0.03369048 0.03200000
## 8  0.02769048  0.03445238 0.03192857
## 9  0.02773810  0.03485714 0.03342857
## 10 0.02795238  0.03535714 0.03359524
## 11 0.02807143  0.03571429 0.03409524
## 12 0.02828571  0.03600000 0.03428571
## 13 0.02861905  0.03711905 0.03471429
## 14 0.02897619  0.03711905 0.03519048
## 15 0.02930952  0.03785714 0.03557143
# train the optimal kknn model
model <- kknn(as.factor(label) ~ ., train, test, k=9, kernel="triangular")
results <- model$fitted.values
 
# save the class predictions in a column vector

write(as.numeric(levels(results))[results], file="kknn_submission.csv", ncolumns=1)