SF Rents: Build a regression model to predict the rent (price). Use the rent dataset.
rent <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-07-05/rent.csv')
## Rows: 200796 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): post_id, nhood, city, county, address, title, descr, details
## dbl (9): date, year, price, beds, baths, sqft, room_in_apt, lat, lon
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
A quick skimming data shows the following:
rent %>% skimr::skim()
Name | Piped data |
Number of rows | 200796 |
Number of columns | 17 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 9 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
post_id | 0 | 1.00 | 9 | 14 | 0 | 200796 | 0 |
nhood | 0 | 1.00 | 4 | 43 | 0 | 167 | 0 |
city | 0 | 1.00 | 5 | 19 | 0 | 104 | 0 |
county | 1394 | 0.99 | 4 | 13 | 0 | 10 | 0 |
address | 196888 | 0.02 | 1 | 38 | 0 | 2869 | 0 |
title | 2517 | 0.99 | 2 | 298 | 0 | 184961 | 0 |
descr | 197542 | 0.02 | 13 | 16975 | 0 | 3025 | 0 |
details | 192780 | 0.04 | 4 | 595 | 0 | 7667 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
date | 0 | 1.00 | 20095718.38 | 44694.07 | 20000902.00 | 20050227.00 | 20110924.00 | 20120805.0 | 20180717.00 | ▁▇▁▆▃ |
year | 0 | 1.00 | 2009.51 | 4.48 | 2000.00 | 2005.00 | 2011.00 | 2012.0 | 2018.00 | ▁▇▁▆▃ |
price | 0 | 1.00 | 2135.36 | 1427.75 | 220.00 | 1295.00 | 1800.00 | 2505.0 | 40000.00 | ▇▁▁▁▁ |
beds | 6608 | 0.97 | 1.89 | 1.08 | 0.00 | 1.00 | 2.00 | 3.0 | 12.00 | ▇▂▁▁▁ |
baths | 158121 | 0.21 | 1.68 | 0.69 | 1.00 | 1.00 | 2.00 | 2.0 | 8.00 | ▇▁▁▁▁ |
sqft | 136117 | 0.32 | 1201.83 | 5000.22 | 80.00 | 750.00 | 1000.00 | 1360.0 | 900000.00 | ▇▁▁▁▁ |
room_in_apt | 0 | 1.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.0 | 1.00 | ▇▁▁▁▁ |
lat | 193145 | 0.04 | 37.67 | 0.35 | 33.57 | 37.40 | 37.76 | 37.8 | 40.43 | ▁▁▅▇▁ |
lon | 196484 | 0.02 | -122.21 | 0.78 | -123.20 | -122.42 | -122.26 | -122.0 | -74.20 | ▇▁▁▁▁ |
data <- rent %>%
# Treat the target variable with a positively skewed distribution
mutate(price = log(price)) %>%
# Treat missing values
na.omit() %>%
select(post_id, nhood, price, beds, baths, sqft, room_in_apt, title)
# data <- data %>% sample_n(100)
data %>% glimpse()
data %>% skimr::skim()
data %>% select(-post_id) %>% explore()
data %>% describe_all()
data %>% describe_cat(nhood)
data %>% select(-post_id) %>% explore_all(target = price)
spacy_initialize(model = "en_core_web_sm")
## Found 'spacy_condaenv'. spacyr will use this environment
## successfully initialized (spaCy Version: 3.1.3, language model: en_core_web_sm)
## (python options: type = "condaenv", value = "spacy_condaenv")
# process documents and obtain a data.table
tidy_data <- data %>%
# Parse title
mutate(title_parsed = map(.x = .$title, .f = ~spacy_parse(.x))) %>%
unnest(title_parsed) %>%
# Select nouns and adjectives
filter(pos %in% c("ADJ", "NOUN"))
data_filtered <- tidy_data %>%
filter(str_detect(lemma, regex("[a-z]", ignore_case = TRUE))) %>%
group_by(lemma) %>%
summarise(
n = n(),
avg_price = mean(price)
) %>%
filter(n > 150)
data_filtered %>%
ggplot(aes(n, avg_price)) +
# geom_point() +
geom_text(aes(label = lemma), check_overlap = TRUE) +
geom_hline(yintercept = mean(data_filtered$avg_price),
linetype = "dotted", linewidth = 2, color = "darkgray") +
scale_x_log10()
set.seed(1234)
data_split <- initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
set.seed(2345)
data_folds <- rsample::vfold_cv(data_train)
data_folds
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [1035/115]> Fold01
## 2 <split [1035/115]> Fold02
## 3 <split [1035/115]> Fold03
## 4 <split [1035/115]> Fold04
## 5 <split [1035/115]> Fold05
## 6 <split [1035/115]> Fold06
## 7 <split [1035/115]> Fold07
## 8 <split [1035/115]> Fold08
## 9 <split [1035/115]> Fold09
## 10 <split [1035/115]> Fold10
library(usemodels)
use_xgboost(price ~ sqft + baths, data = data_train)
## xgboost_recipe <-
## recipe(formula = price ~ sqft + baths, data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(76431)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# xgboost_recipe <-
# recipe(formula = price ~ ., data = data_train) %>%
# recipes::update_role(post_id, new_role = "id") %>%
# step_tokenize(title, engine = "spacyr") %>%
# step_lemma(title) %>%
# step_pos_filter(title, keep_tags = c("NOUN", "ADJ")) %>%
# step_tokenfilter(title, max_tokens = 150) %>%
# step_tfidf(title) %>%
# step_other(nhood) %>%
# step_dummy(nhood) %>%
# step_log(price, sqft, baths) # To transform variables with skewed distribution
xgboost_recipe <-
recipe(formula = price ~ ., data = data_train) %>%
recipes::update_role(post_id, new_role = "id") %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tfidf(title) %>%
step_other(nhood) %>%
step_dummy(nhood) %>%
step_log(sqft, baths) # To transform variables with skewed distribution
xgboost_recipe %>% prep() %>% bake(new_data = NULL) %>% glimpse()
## Rows: 1,150
## Columns: 107
## $ post_id <fct> 4956734544, 4834839287, 4790045716, 597279835…
## $ beds <dbl> 2, 0, 1, 1, 3, 2, 3, 2, 2, 2, 1, 1, 4, 4, 4, …
## $ baths <dbl> 0.6931472, 0.0000000, 0.0000000, 0.0000000, 0…
## $ sqft <dbl> 7.340836, 5.991465, 6.618739, 6.309918, 7.495…
## $ room_in_apt <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ price <dbl> 8.779557, 7.309881, 7.783224, 7.374629, 8.131…
## $ tfidf_title_1 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_1.5 <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_1ba <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_1br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_2 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_2.5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2ba <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2bd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_3br <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_4 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_a <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_amp <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_and <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_apartment <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_apt <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_area <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_at <dbl> 1.1229922, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_available <dbl> 0.0000000, 0.0000000, 0.5885586, 0.0000000, 0…
## $ tfidf_title_ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_bart <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bath <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_bay <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_beautiful <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.424…
## $ tfidf_title_bed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bedroom <dbl> 0.0000000, 0.0000000, 0.4088961, 0.0000000, 0…
## $ tfidf_title_br <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_charming <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_city <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_close <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_community <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_condo <dbl> 0.8358416, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_cupertino <dbl> 0.0000000, 1.0431691, 0.0000000, 0.0000000, 0…
## $ tfidf_title_d <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_downtown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_duplex <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_family <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_floor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_for <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_free <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_from <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_fully <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_furnished <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_garage <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_garden <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_gorgeous <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_great <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_heart <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_hill <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_home <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_house <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_in <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_large <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_living <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_located <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_location <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_loft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_luxury <dbl> 1.1229922, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_modern <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_month <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_move <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_near <dbl> 0.0000000, 0.8198097, 0.0000000, 0.0000000, 0…
## $ tfidf_title_neighborhood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_new <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_nice <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_now <dbl> 0.0000000, 0.0000000, 0.6646053, 0.0000000, 0…
## $ tfidf_title_of <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_on <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_one <dbl> 0.0000000, 0.0000000, 0.6356108, 0.0000000, 0…
## $ tfidf_title_open <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_park <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_parking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_quiet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_remodeled <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_rent <dbl> 0.0000000, 0.6636341, 0.0000000, 0.0000000, 0…
## $ tfidf_title_san <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_schools <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_see <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_single <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_spacious <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_studio <dbl> 0.0000000, 0.8252187, 0.0000000, 0.0000000, 0…
## $ tfidf_title_the <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_this <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_to <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_today <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_top <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_townhome <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_townhouse <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_two <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_unit <dbl> 0.0000000, 0.0000000, 0.0000000, 3.3008747, 0…
## $ tfidf_title_updated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_valley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_views <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_w <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_walk <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_with <dbl> 0.0000000, 0.0000000, 0.4718561, 0.0000000, 0…
## $ tfidf_title_your <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ nhood_other <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
set.seed(15793)
doParallel::registerDoParallel()
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_folds,
grid = 5)
show_best(xgboost_tune, metric = "rmse")
## # A tibble: 5 × 8
## trees min_n .metric .estimator mean n std_err .config
## <int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 83 14 rmse standard 0.351 10 0.00873 Preprocessor1_Model2
## 2 458 31 rmse standard 0.379 10 0.00938 Preprocessor1_Model4
## 3 923 38 rmse standard 0.393 10 0.0113 Preprocessor1_Model5
## 4 1637 20 rmse standard 0.397 10 0.00744 Preprocessor1_Model3
## 5 1593 4 rmse standard 0.402 10 0.00800 Preprocessor1_Model1
# How did all the possible parameter combinations do?
autoplot(xgboost_tune)
We can finalize our random forest workflow with the best performing parameters.
final_rf <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, "rmse"))
The function last_fit() fits this finalized random forest one last time to the training data and evaluates one last time on the testing data.
data_fit <- last_fit(final_rf, data_split)
data_fit
## # Resampling results
## # Manual resampling
## # A tibble: 1 × 6
## splits id .metrics .notes .predictions .workflow
## <list> <chr> <list> <list> <list> <list>
## 1 <split [1150/384]> train/test split <tibble> <tibble> <tibble> <workflow>
collect_metrics(data_fit)
## # A tibble: 2 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 rmse standard 0.322 Preprocessor1_Model1
## 2 rsq standard 0.484 Preprocessor1_Model1
collect_predictions(data_fit)
## # A tibble: 384 × 5
## id .pred .row price .config
## <chr> <dbl> <int> <dbl> <chr>
## 1 train/test split 7.89 1 7.72 Preprocessor1_Model1
## 2 train/test split 7.92 2 7.88 Preprocessor1_Model1
## 3 train/test split 7.73 3 7.58 Preprocessor1_Model1
## 4 train/test split 7.78 5 8.10 Preprocessor1_Model1
## 5 train/test split 8.12 7 8.97 Preprocessor1_Model1
## 6 train/test split 7.35 9 7.72 Preprocessor1_Model1
## 7 train/test split 8.36 12 7.88 Preprocessor1_Model1
## 8 train/test split 7.74 14 7.74 Preprocessor1_Model1
## 9 train/test split 7.72 15 7.70 Preprocessor1_Model1
## 10 train/test split 7.64 18 7.70 Preprocessor1_Model1
## # ℹ 374 more rows
collect_predictions(data_fit) %>%
ggplot(aes(price, .pred)) +
geom_point(alpha = 0.5, fill = "midnightblue") +
geom_abline(lty = 2, color = "gray50") +
coord_fixed()