Change the author of this RMD file to be yourself and modify the below code so that you can successfully load the ‘wine.rds’ data file from your own computer.
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(caret)
library(fastDummies)
wine = read_rds("/Users/Rose/Downloads/wine.rds")
library(caret)
wino <- wine %>%
mutate(lprice=log(price)) %>%
# engineer features here
mutate(variety = fct_lump(variety, 10)) %>%
select(lprice, points, variety) %>%
drop_na(.)
head(wino,10)
## # A tibble: 10 x 3
## lprice points variety
## <dbl> <dbl> <fct>
## 1 2.71 87 Other
## 2 2.64 87 Other
## 3 2.56 87 Riesling
## 4 4.17 87 Pinot Noir
## 5 2.71 87 Other
## 6 2.77 87 Other
## 7 3.18 87 Other
## 8 2.48 87 Other
## 9 3.30 87 Other
## 10 2.94 87 Cabernet Sauvignon
set.seed(504)
wine_index <- createDataPartition(wino$lprice, p = .8, list = F)
wino_tr <- wino[wine_index, ]
wino_te <- wino[-wine_index, ]
control <- trainControl(method ="boot", number = 5)
model <- train(lprice ~ .,
data = wino_tr,
method = "lm",
trControl = control)
# Predict the outcomes on Testing data
wine_pred <- predict(model, wino_te)
# View Results
postResample(pred = wine_pred,
obs = wino_te$lprice)
## RMSE Rsquared MAE
## 0.5012143 0.4259170 0.3901187
## RMSE = 0.5012, not very good, we want the error to be smaller
Graph the importance of your 10 features.
plot(varImp(model, scale = T))