# A tibble: 600 × 1
.pred
<dbl>
1 6.40
2 6.36
3 5.83
4 6.22
5 5.65
6 5.48
7 6.02
8 6.03
9 5.92
10 6.33
# ℹ 590 more rows
# A tibble: 3 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 0.611
2 rsq standard 0.483
3 mae standard 0.465
---
title: "Assignment_3"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: scroll
source_code: embed
---
```{r setup, include=FALSE}
library(readxl)
library(dplyr)
library(tidymodels)
library(tidyverse)
library(packcircles)
library(plotly)
library(viridisLite)
library(fontawesome)
library(shiny)
library(shinyjs)
library(DT)
library(bslib)
library(shinyWidgets)
games <- read_excel("Data-Games/board_games_clean.xlsx")
### DATA CLEANING -----
skimr::skim(games)
games_new = games
games_new[games_new == "NA"] = NA
str(games_new)
games_new$Themes = toupper(games_new$Theme)
games_new$Themes = as.factor(games_new$Themes)
games_new$Mechanic = toupper(games_new$Category)
games_new$Mechanic = as.factor(games_new$Mechanic)
games_new$Publishers = as.factor(games_new$Publisher)
games_analysis <- dplyr::select(games_new,1:5, 8, 9, 11:15)
games_analysis <-
games_analysis %>%
relocate(Publishers, .after = Years) %>%
relocate(Themes, .after = Playing_time) %>%
relocate(Mechanic, .after = Themes) %>%
rename(Play_time = Playing_time) %>%
rename(Year_published = Years) %>%
rename(Total_ratings = Number_of_ratings)
skimr::skim(games_analysis)
str(games_analysis)
```
```{r}
set.seed(123)
games_splits <-
games_analysis %>%
slice(1:1000) %>%
select(-NAME) %>%
initial_split(prop = 0.6,
strata = Average_rating)
```
```{r}
games_recipe <-
recipe(Average_rating ~ ., data = training(games_splits)) %>%
step_naomit() %>%
step_shuffle()
games_boot <- bootstraps(training(games_splits))
```
```{r}
games_ranger <- rand_forest(
mtry = tune(),
trees = tune(),
min_n = tune()) %>%
set_engine("ranger", importance = "permutation") %>%
set_mode("regression")
```
```{r}
ranger_wflow <-
workflow() %>%
add_model(games_ranger) %>%
add_recipe(games_recipe)
```
```{r}
set.seed(123)
ranger_grid <- tune_grid(
ranger_wflow,
resamples = games_boot,
grid = 20)
lowest_rmse_ranger <- select_best(ranger_grid, metric = "rmse")
final_ranger_fit <-
finalize_workflow(ranger_wflow, lowest_rmse_ranger) %>%
fit(data = training(games_splits))
```
```{r}
final_ranger_fit %>%
predict(training(games_splits))
```
```{r}
final_ranger_fit %>%
predict(testing(games_splits)) %>%
bind_cols(testing(games_splits)) %>%
metrics(truth = Average_rating, estimate = .pred)
```