KNN with LOOCV
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
train <- read.csv("wineq_train.csv", stringsAsFactors = F)
test <- read.csv("wineq_validation.csv", , stringsAsFactors=F)
train_2vars <- function(k, center = FALSE){
#print(k) # For debug
train_scaled <- train
if (center) {
pre_proc <- preProcess(train, method = c("center", "scale"))
train_scaled <- predict(pre_proc, train)
}
ctrl <- trainControl(method = "LOOCV")
model <- train(quality ~ alcohol + density, data = train_scaled, method = "knn", tuneGrid=data.frame(k=k), trControl = ctrl)
return(model)
}
train_allvars <- function(k, center = FALSE){
#print(k) # For debug
train_scaled <- train
if (center) {
pre_proc <- preProcess(train, method = c("center", "scale"))
train_scaled <- predict(pre_proc, train)
}
#specify the cross-validation method
ctrl <- trainControl(method = "LOOCV")
model <- train(quality ~ ., data = train_scaled, method = "knn", tuneGrid=data.frame(k=k), trControl = ctrl)
return(model)
}
compute_model_accuracy <- function(model) {
preds <- predict(model, newdata = test)
actual <- test$quality
mean(preds == actual) # Accuracy calculation
}
# models_list <- lapply(1:21, train_2vars)
# allvar_models_list <- lapply(1:21, train_allvars)
# save(models_list, file = "knn_models.RData")
# save(allvar_models_list, file = "all_knn_models.RData")
scaled_models_list <- lapply(1:21, train_2vars, TRUE)
scaled_allvar_models_list <- lapply(1:21, train_allvars, TRUE)
save(scaled_models_list, file = "scaled_knn_models.RData")
save(scaled_allvar_models_list, file = "scaled_all_knn_models.RData")
#scaled_knn_models <- load("scaled_knn_models.RData")
#scaled_allvar_models_list <- load("scaled_all_knn_models.RData")
rmse_values <- sapply(scaled_models_list, function(model) min(model$results$RMSE))
rmse_df <- data.frame(k = 1:21, RMSE = rmse_values)
plot(rmse_df, main= "KNN with LOOCV. density and alcohol as predictors")

all_rmse_values <- sapply(scaled_allvar_models_list, function(model) min(model$results$RMSE))
all_rmse_df <- data.frame(k = 1:21, RMSE = all_rmse_values)
plot(all_rmse_df, main= "KNN with LOOCV. All variables as predictors")

all <- train_allvars(21, TRUE)
yhat = predict(all, newdata=test)
write.table(file="mySubmission.txt", yhat, row.names = FALSE, col.names = FALSE)
10-FOLD
train_2vars_kfold <- function(k, center = FALSE) {
#print(k)
train_scaled <- train
if (center) {
pre_proc <- preProcess(train, method = c("center", "scale"))
train_scaled <- predict(pre_proc, train)
}
ctrl <- trainControl(method = "cv", number = 10)
model <- train(quality ~ alcohol + density, data = train_scaled, method = "knn", tuneGrid = data.frame(k=k), trControl = ctrl)
return(model)
}
train_allvars_kfold <- function(k, center = FALSE) {
#print(k)
train_scaled <- train
if (center) {
pre_proc <- preProcess(train, method = c("center", "scale"))
train_scaled <- predict(pre_proc, train)
}
ctrl <- trainControl(method = "cv", number = 10)
model <- train(quality ~ ., data = train_scaled, method = "knn", tuneGrid = data.frame(k=k), trControl = ctrl)
return(model)
}
models_2var_kfold_list <- lapply(1:21, train_allvars_kfold, TRUE)
models_allvar_kfold_list <- lapply(1:21, train_allvars_kfold, TRUE)
#save(models_2var_kfold_list, file = "models_2var_kfold_list.RData")
#save(scaled_allvar_models_list, file = "scaled_allvar_models_list.RData")
kfold_2var_rmse_values <- sapply(models_2var_kfold_list, function(model) min(model$results$RMSE))
kfold_2var_rmse_df <- data.frame(k = 1:21, RMSE = kfold_2var_rmse_values)
plot(kfold_2var_rmse_df, main= "KNN with 10-FOLD. density and alcohol as predictors")

kfold_all_rmse_values <- sapply(models_allvar_kfold_list, function(model) min(model$results$RMSE))
kfold_all_rmse_df <- data.frame(k = 1:21, RMSE = kfold_all_rmse_values)
plot(kfold_all_rmse_df, main= "KNN with 10-FOLD. All variables as predictors")
