# A tibble: 600 × 1
   .pred
   <dbl>
 1  6.40
 2  6.36
 3  5.83
 4  6.22
 5  5.65
 6  5.48
 7  6.02
 8  6.03
 9  5.92
10  6.33
# ℹ 590 more rows
# A tibble: 3 × 3
  .metric .estimator .estimate
  <chr>   <chr>          <dbl>
1 rmse    standard       0.611
2 rsq     standard       0.483
3 mae     standard       0.465
---
title: "Assignment_3"
output:
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: scroll
    source_code: embed
---

```{r setup, include=FALSE}
library(readxl)
library(dplyr)
library(tidymodels)
library(tidyverse)
library(packcircles)
library(plotly)
library(viridisLite)
library(fontawesome)
library(shiny)
library(shinyjs)
library(DT)
library(bslib)
library(shinyWidgets)

games <- read_excel("Data-Games/board_games_clean.xlsx")

### DATA CLEANING -----
skimr::skim(games)

games_new = games

games_new[games_new == "NA"] = NA

str(games_new)

games_new$Themes = toupper(games_new$Theme)
games_new$Themes = as.factor(games_new$Themes)

games_new$Mechanic = toupper(games_new$Category)
games_new$Mechanic = as.factor(games_new$Mechanic)

games_new$Publishers = as.factor(games_new$Publisher)

games_analysis <- dplyr::select(games_new,1:5, 8, 9, 11:15)
games_analysis <-
  games_analysis %>%
  relocate(Publishers, .after = Years) %>% 
  relocate(Themes, .after = Playing_time) %>%
  relocate(Mechanic, .after = Themes) %>% 
  rename(Play_time = Playing_time) %>% 
  rename(Year_published = Years) %>% 
  rename(Total_ratings = Number_of_ratings)

skimr::skim(games_analysis)
str(games_analysis)

```


```{r}
set.seed(123)
games_splits <- 
  games_analysis %>%
  slice(1:1000) %>% 
  select(-NAME) %>% 
  initial_split(prop = 0.6,
                strata = Average_rating)

```


```{r}
games_recipe <-
  recipe(Average_rating ~ ., data = training(games_splits)) %>%
  step_naomit() %>% 
  step_shuffle()

games_boot <- bootstraps(training(games_splits))
```


```{r}
games_ranger <- rand_forest(
  mtry  = tune(),
  trees = tune(),
  min_n = tune()) %>%
  set_engine("ranger", importance = "permutation") %>%
  set_mode("regression")

```


```{r}
ranger_wflow <- 
  workflow() %>%
  add_model(games_ranger) %>%
  add_recipe(games_recipe)

```


```{r}
set.seed(123)
ranger_grid <- tune_grid(
  ranger_wflow,
  resamples = games_boot,
  grid = 20)

lowest_rmse_ranger <- select_best(ranger_grid, metric = "rmse")

final_ranger_fit <- 
  finalize_workflow(ranger_wflow, lowest_rmse_ranger) %>%
  fit(data = training(games_splits))

```


```{r}
final_ranger_fit %>% 
  predict(training(games_splits))

```


```{r}
final_ranger_fit %>%
  predict(testing(games_splits)) %>%
  bind_cols(testing(games_splits)) %>%
  metrics(truth = Average_rating, estimate = .pred)

```