nyt_titles <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2022/2022-05-10/nyt_titles.tsv')
## Rows: 7431 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): title, author
## dbl (5): id, year, total_weeks, debut_rank, best_rank
## date (1): first_week
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(nyt_titles)
| Name | nyt_titles |
| Number of rows | 7431 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| Date | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| title | 0 | 1 | 1 | 74 | 0 | 7172 | 0 |
| author | 4 | 1 | 4 | 73 | 0 | 2205 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| first_week | 0 | 1 | 1931-10-12 | 2020-12-06 | 2000-06-25 | 3348 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| id | 0 | 1 | 3715.00 | 2145.29 | 0 | 1857.5 | 3715 | 5572.5 | 7430 | ▇▇▇▇▇ |
| year | 0 | 1 | 1989.61 | 26.23 | 1931 | 1968.0 | 2000 | 2011.0 | 2020 | ▂▂▂▃▇ |
| total_weeks | 0 | 1 | 8.13 | 11.21 | 1 | 2.0 | 4 | 10.0 | 178 | ▇▁▁▁▁ |
| debut_rank | 0 | 1 | 7.90 | 4.57 | 1 | 4.0 | 8 | 12.0 | 17 | ▇▆▅▅▅ |
| best_rank | 0 | 1 | 6.91 | 4.57 | 1 | 3.0 | 6 | 10.0 | 17 | ▇▅▃▃▂ |
data1 <- nyt_titles %>%
#treat missing values
select(-id, -year, -first_week) %>%
na.omit() %>%
#log transform variables with pos-skewed distribution
mutate(total_weeks = log(total_weeks))
data_binarized_tbl1 <- data1 %>%
select(-author) %>%
binarize()
#step 2 correlate
data_corr_tbl1 <- data_binarized_tbl1 %>%
correlate(total_weeks__0.693147180559945_1.38629436111989)
data_corr_tbl1 %>%
plot_correlation_funnel()
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the correlationfunnel package.
## Please report the issue at
## <https://github.com/business-science/correlationfunnel/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the correlationfunnel package.
## Please report the issue at
## <https://github.com/business-science/correlationfunnel/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Split data
#data <- sample_n(data, 100)
#split into train and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data1)
data_train <- training(data_split)
data_test <- testing(data_split)
#further split training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [5013/557]> Fold01
## 2 <split [5013/557]> Fold02
## 3 <split [5013/557]> Fold03
## 4 <split [5013/557]> Fold04
## 5 <split [5013/557]> Fold05
## 6 <split [5013/557]> Fold06
## 7 <split [5013/557]> Fold07
## 8 <split [5013/557]> Fold08
## 9 <split [5013/557]> Fold09
## 10 <split [5013/557]> Fold10
library(usemodels)
usemodels::use_xgboost(total_weeks ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = total_weeks ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(41811)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
xgboost_recipe <-
recipe(formula = total_weeks ~ ., data = data_train) %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tf(title) %>%
step_other(author, threshold = 0.003) %>%
step_dummy(author) %>%
step_zv(all_predictors())
xgboost_recipe %>% prep() %>% juice() %>% glimpse
## Rows: 5,570
## Columns: 144
## $ debut_rank <dbl> 8, 4, 14, 4, 12, 12, 10, 6, 7…
## $ best_rank <dbl> 10, 6, 13, 1, 6, 4, 7, 5, 8, …
## $ total_weeks <dbl> 2.1972246, 0.6931472, 0.00000…
## $ tf_title_a <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_after <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_all <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_an <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_and <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_are <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_as <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_at <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_big <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_black <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_blood <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_blue <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_bones <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_book <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_cat <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_christmas <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_city <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_cold <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_dark <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_day <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_days <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_dead <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_death <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_die <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_down <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_edge <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_fall <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_fire <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_first <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ tf_title_for <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_from <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_game <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_girl <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_girls <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_god <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_golden <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_gone <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_good <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_heart <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_heaven <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_high <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_home <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_honor <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_house <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_i <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_in <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_is <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_island <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_lady <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_last <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_life <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_light <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_little <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_long <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_lost <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_love <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_man <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_me <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_men <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_midnight <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_moon <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_mr <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_murder <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_my <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_new <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_night <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_no <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_not <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_of <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ tf_title_on <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_one <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_prey <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_red <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_river <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_road <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_sea <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_second <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_secret <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_shadow <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_son <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_star <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_storm <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_street <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_summer <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_the <int> 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,…
## $ tf_title_this <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_three <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_time <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_to <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ tf_title_tree <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_two <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_we <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_white <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_who <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_wife <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_wind <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_with <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_woman <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_world <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tf_title_you <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Anne.Rice <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Barbara.Taylor.Bradford <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Catherine.Coulter <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Danielle.Steel <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_David.Baldacci <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Dean.Koontz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Debbie.Macomber <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Dick.Francis <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Douglas.Preston.and.Lincoln.Child <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Harlan.Coben <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Iris.Johansen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_J..A..Jance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_J..D..Robb <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Jack.Higgins <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_James.Lee.Burke <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_James.Patterson <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_James.Patterson.and.Maxine.Paetro <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Janet.Evanovich <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Jeffrey.Archer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_John.Grisham <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_John.le.Carré <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_John.Sandford <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Jonathan.Kellerman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Ken.Follett <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Lisa.Scottoline <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Mary.Higgins.Clark <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Michael.Connelly <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Nora.Roberts <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Patricia.Cornwell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Richard.Paul.Evans <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Robert.B..Parker <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Robert.Ludlum <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Robin.Cook <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Sandra.Brown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Stephen.King <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Stuart.Woods <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Sue.Grafton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Taylor.Caldwell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_Terry.Brooks <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ author_W..E..B..Griffin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ author_other <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,…
rf_spec <-
rand_forest(trees = 500) %>%
set_mode("regression")
rf_spec
## Random Forest Model Specification (regression)
##
## Main Arguments:
## trees = 500
##
## Computational engine: ranger
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune())%>%
set_mode("regression") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
rf_wf <- workflow(xgboost_recipe, rf_spec)
set.seed(12193)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5)
#evaluate models
tune::show_best(xgboost_tune, metric = "rmse")
## # A tibble: 5 × 10
## trees min_n tree_depth learn_rate .metric .estimator mean n std_err
## <int> <int> <int> <dbl> <chr> <chr> <dbl> <int> <dbl>
## 1 1000 2 1 0.0750 rmse standard 0.828 10 0.00614
## 2 2000 40 8 0.0178 rmse standard 0.834 10 0.00653
## 3 1500 11 11 0.001 rmse standard 0.845 10 0.00478
## 4 500 21 15 0.316 rmse standard 0.899 10 0.00691
## 5 1 30 4 0.00422 rmse standard 1.11 10 0.00741
## # ℹ 1 more variable: .config <chr>
#update the model by selecting best hyperparameters.
xgboost_fw <- tune::finalize_workflow(xgboost_workflow,
tune::select_best(xgboost_tune, metric = "rmse"))
#fit the model on the entire training data and test it on the test data.
data_fit <- tune::last_fit(xgboost_fw, data_split)
contrl_preds <- control_resamples(save_pred = TRUE)
ranger_rs <- fit_resamples(
rf_wf,
resamples = data_cv,
control = contrl_preds)
collect_metrics(ranger_rs)
## # A tibble: 2 × 6
## .metric .estimator mean n std_err .config
## <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 rmse standard 0.828 10 0.00462 pre0_mod0_post0
## 2 rsq standard 0.446 10 0.00814 pre0_mod0_post0
bind_rows(collect_predictions(ranger_rs) %>%
mutate(mod = "ranger")) %>%
ggplot(aes(total_weeks, .pred)) +
geom_point(alpha = 0.3, fill = "midnightblue") +
geom_abline(lty = 2, color = "gray50") +
facet_wrap(vars(mod)) +
coord_fixed()
tune::collect_metrics(data_fit)
## # A tibble: 2 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 rmse standard 0.816 pre0_mod0_post0
## 2 rsq standard 0.452 pre0_mod0_post0
tune::collect_predictions(data_fit) %>%
ggplot(aes(total_weeks, .pred)) +
geom_point(alpha = 0.3, fill = "midnightblue") +
geom_abline(lty = 2, color = "gray50") +
coord_fixed()