KNN Model
setwd("C:/Users/Manjari/Desktop/Machine learning/Final Report Manjari")
# Read all data
train<- data.frame(read.csv("train.csv",header=T))
# load test datasets
test <- data.frame(read.csv("test.csv", header=T))
# Its dimesions?
dim(train)
## [1] 42000 785
# Just the second row and only pixels not label
pixels<-train[2,2:785]
dim(pixels)
## [1] 1 784
# What is the label
train[2,1]
## [1] 0
# Save in a file but while saving round up [0,255] as [0,1]
write.table(matrix(round(pixels/255),28,28), file = "second.txt", sep = " ", row.names = FALSE, col.names = FALSE)
# Append also to this file label value
write.table(paste("label",train[2,1]), file = "second.txt", sep = " ", row.names = FALSE, col.names = FALSE,append=T)
# Read 2nd row and ignore label column
data<-train[2,2:785]
data<-as.matrix(data,nrow=28,ncol=28)
dim(data)
## [1] 1 784
data<-matrix(data,nrow=28,ncol=28)
dim(data)
## [1] 28 28
##Color ramp def.
colors<-c('white','black')
# Create a function to generate a continuum of colors
# of desired number of colours from white to black
ramp_pal<-colorRampPalette(colors=colors)
# Draw an image of data over a grid of x(1:28), y(1:28)
image(1:28,1:28,data,main="IInd row. Label=0",col=ramp_pal(256))
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(kknn)
##
## Attaching package: 'kknn'
##
## The following object is masked from 'package:caret':
##
## contr.dummy
# remove near-zero variance features
badCols <- nearZeroVar(train[, -1])
train <- train[, -(badCols+1)]
# optimize knn for k=1:15
# and kernel=triangular, rectangular, or gaussian
model <- train.kknn(as.factor(label) ~ ., train, kmax=15, kernel= c("triangular","rectangular","gaussian"))
# print out best parameters and prediction error
print(paste("Best parameters:", "kernel =", model$best.parameters$kernel, ", k =", model$best.parameters$k))
## [1] "Best parameters: kernel = triangular , k = 6"
print(model$MISCLASS)
## triangular rectangular gaussian
## 1 0.03216667 0.03216667 0.03216667
## 2 0.03216667 0.03885714 0.03216667
## 3 0.03026190 0.03197619 0.03042857
## 4 0.02923810 0.03238095 0.02909524
## 5 0.02842857 0.03247619 0.03088095
## 6 0.02764286 0.03297619 0.03045238
## 7 0.02769048 0.03369048 0.03200000
## 8 0.02769048 0.03445238 0.03192857
## 9 0.02773810 0.03485714 0.03342857
## 10 0.02795238 0.03535714 0.03359524
## 11 0.02807143 0.03571429 0.03409524
## 12 0.02828571 0.03600000 0.03428571
## 13 0.02861905 0.03711905 0.03471429
## 14 0.02897619 0.03711905 0.03519048
## 15 0.02930952 0.03785714 0.03557143
# train the optimal kknn model
model <- kknn(as.factor(label) ~ ., train, test, k=9, kernel="triangular")
results <- model$fitted.values
# save the class predictions in a column vector
write(as.numeric(levels(results))[results], file="kknn_submission.csv", ncolumns=1)