문제 1

library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
setwd("C:/Users/kyeeu/OneDrive/Desktop")
raw_data <- read.csv("day.csv")
df <- raw_data[1:80, c("temp", "atemp", "hum", "windspeed", "cnt")]
set.seed(1)
idx <- createDataPartition(df$cnt, p = 0.7, list = FALSE)
train_set <- df[idx, ]
test_set  <- df[-idx, ]

히스토그램 생성

par(mfrow=c(1,2))
hist(train_set$cnt, main="Train Set CNT", col="gray")
hist(test_set$cnt, main="Test Set CNT", col="white")

문제 2

repeat_list <- c(1, 5, 10, 20, 30)
knn_results <- list()
for (i in repeat_list) {
     my_ctrl <- trainControl(method = "repeatedcv", number = 5, repeats = i)
     set.seed(1)
     knn_fit <- train(cnt ~ ., data = train_set, 
                      method = "knn", 
                      trControl = my_ctrl, 
                      preProcess = c("center", "scale"))
 
 
 knn_results[[paste0("rep", i)]] <- knn_fit
 }

결과 확인

plot(knn_results[["rep30"]])

print(knn_results[["rep30"]]$bestTune)
##   k
## 3 9

문제 3

final_ctrl <- trainControl(method = "repeatedcv", number = 5, repeats = 30)
set.seed(1)
lm_fit <- train(cnt ~ ., data = train_set, 
                 method = "lm", 
                 trControl = final_ctrl)
comparison <- resamples(list(kNN = knn_results[["rep30"]], Linear = lm_fit))
summary(comparison)
## 
## Call:
## summary.resamples(object = comparison)
## 
## Models: kNN, Linear 
## Number of resamples: 150 
## 
## MAE 
##            Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## kNN    201.4747 297.8030 344.9278 345.6582 389.7652 552.4630    0
## Linear 141.5368 231.1167 284.5558 283.9545 334.3250 415.8336    0
## 
## RMSE 
##            Min.  1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## kNN    223.9069 365.3751 439.4457 436.3938 502.9441 688.2007    0
## Linear 174.3903 288.6731 348.8385 358.8649 422.9685 547.0752    0
## 
## Rsquared 
##               Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## kNN    0.012900932 0.2482154 0.4029453 0.3870312 0.5375121 0.9239330    0
## Linear 0.002146765 0.4823014 0.6034514 0.5860778 0.7265960 0.9035185    0

RMSE 차이

bwplot(comparison)

문제 4