Apply it to your Data 1

SF Rents: Build a regression model to predict the rent (price). Use the rent dataset.

Import Data

rent <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-07-05/rent.csv')

## Rows: 200796 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): post_id, nhood, city, county, address, title, descr, details
## dbl (9): date, year, price, beds, baths, sqft, room_in_apt, lat, lon
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clean data

A quick skimming data shows the following:

Missing values are present in many variables.
Many numeric variables have a positively skewed distribution.

rent %>% skimr::skim()

Data summary
Name	Piped data
Number of rows	200796
Number of columns	17
_______________________
Column type frequency:
character	8
numeric	9
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
post_id	0	1.00	9	14	200796
nhood	0	1.00	4	43	167
city	0	1.00	5	19	104
county	1394	0.99	4	13	10
address	196888	0.02	1	38	2869
title	2517	0.99	2	298	184961
descr	197542	0.02	13	16975	3025
details	192780	0.04	4	595	7667

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
date	0	1.00	20095718.38	44694.07	20000902.00	20050227.00	20110924.00	20120805.0	20180717.00	▁▇▁▆▃
year	0	1.00	2009.51	4.48	2000.00	2005.00	2011.00	2012.0	2018.00	▁▇▁▆▃
price	0	1.00	2135.36	1427.75	220.00	1295.00	1800.00	2505.0	40000.00	▇▁▁▁▁
beds	6608	0.97	1.89	1.08	0.00	1.00	2.00	3.0	12.00	▇▂▁▁▁
baths	158121	0.21	1.68	0.69	1.00	1.00	2.00	2.0	8.00	▇▁▁▁▁
sqft	136117	0.32	1201.83	5000.22	80.00	750.00	1000.00	1360.0	900000.00	▇▁▁▁▁
room_in_apt	0	1.00	0.00	0.04	0.00	0.00	0.00	0.0	1.00	▇▁▁▁▁
lat	193145	0.04	37.67	0.35	33.57	37.40	37.76	37.8	40.43	▁▁▅▇▁
lon	196484	0.02	-122.21	0.78	-123.20	-122.42	-122.26	-122.0	-74.20	▇▁▁▁▁

data <- rent %>% 
    
    # Treat the target variable with a positively skewed distribution
    mutate(price = log(price)) %>%
    
    # Treat missing values
    na.omit() %>%
    
    select(post_id, nhood, price, beds, baths, sqft, room_in_apt, title)


# data <- data %>% sample_n(100)

Explore Data

data %>% glimpse()
data %>% skimr::skim()
data %>% select(-post_id) %>% explore()
data %>% describe_all()
data %>% describe_cat(nhood)
data %>% select(-post_id) %>% explore_all(target = price)

spacy_initialize(model = "en_core_web_sm")

## Found 'spacy_condaenv'. spacyr will use this environment

## successfully initialized (spaCy Version: 3.1.3, language model: en_core_web_sm)

## (python options: type = "condaenv", value = "spacy_condaenv")

# process documents and obtain a data.table
tidy_data <- data %>%
    
    # Parse title
    mutate(title_parsed = map(.x = .$title, .f = ~spacy_parse(.x))) %>%
    unnest(title_parsed) %>% 
    
    # Select nouns and adjectives 
    filter(pos %in% c("ADJ", "NOUN"))

data_filtered <- tidy_data %>%
    filter(str_detect(lemma, regex("[a-z]", ignore_case = TRUE))) %>%
    group_by(lemma) %>%
    summarise(
        n = n(),
        avg_price = mean(price)
    ) %>%
    filter(n > 150)

data_filtered %>%
    ggplot(aes(n, avg_price)) +
    # geom_point() +
    geom_text(aes(label = lemma), check_overlap = TRUE) +
    geom_hline(yintercept = mean(data_filtered$avg_price),
               linetype = "dotted", linewidth = 2, color = "darkgray") +
    scale_x_log10()

Build a Model

set.seed(1234)
data_split <- initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)

set.seed(2345)
data_folds <- rsample::vfold_cv(data_train)
data_folds

## #  10-fold cross-validation 
## # A tibble: 10 × 2
##    splits             id    
##    <list>             <chr> 
##  1 <split [1035/115]> Fold01
##  2 <split [1035/115]> Fold02
##  3 <split [1035/115]> Fold03
##  4 <split [1035/115]> Fold04
##  5 <split [1035/115]> Fold05
##  6 <split [1035/115]> Fold06
##  7 <split [1035/115]> Fold07
##  8 <split [1035/115]> Fold08
##  9 <split [1035/115]> Fold09
## 10 <split [1035/115]> Fold10

library(usemodels)
use_xgboost(price ~ sqft + baths, data = data_train)

## xgboost_recipe <- 
##   recipe(formula = price ~ sqft + baths, data = data_train) %>% 
##   step_zv(all_predictors()) 
## 
## xgboost_spec <- 
##   boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
##     loss_reduction = tune(), sample_size = tune()) %>% 
##   set_mode("classification") %>% 
##   set_engine("xgboost") 
## 
## xgboost_workflow <- 
##   workflow() %>% 
##   add_recipe(xgboost_recipe) %>% 
##   add_model(xgboost_spec) 
## 
## set.seed(76431)
## xgboost_tune <-
##   tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))

# xgboost_recipe <- 
#   recipe(formula = price ~ ., data = data_train) %>%
#     recipes::update_role(post_id, new_role = "id") %>%
#     step_tokenize(title, engine = "spacyr") %>%
#     step_lemma(title) %>%
#     step_pos_filter(title, keep_tags = c("NOUN", "ADJ")) %>%
#     step_tokenfilter(title, max_tokens = 150) %>%
#     step_tfidf(title) %>%
#     step_other(nhood) %>%
#     step_dummy(nhood) %>%
#     step_log(price, sqft, baths) # To transform variables with skewed distribution

xgboost_recipe <- 
  recipe(formula = price ~ ., data = data_train) %>%
    recipes::update_role(post_id, new_role = "id") %>%
    step_tokenize(title) %>%
    step_tokenfilter(title, max_tokens = 100) %>%
    step_tfidf(title) %>%
    step_other(nhood) %>%
    step_dummy(nhood) %>%
    step_log(sqft, baths) # To transform variables with skewed distribution

    

xgboost_recipe %>% prep() %>% bake(new_data = NULL) %>% glimpse()

## Rows: 1,150
## Columns: 107
## $ post_id                  <fct> 4956734544, 4834839287, 4790045716, 597279835…
## $ beds                     <dbl> 2, 0, 1, 1, 3, 2, 3, 2, 2, 2, 1, 1, 4, 4, 4, …
## $ baths                    <dbl> 0.6931472, 0.0000000, 0.0000000, 0.0000000, 0…
## $ sqft                     <dbl> 7.340836, 5.991465, 6.618739, 6.309918, 7.495…
## $ room_in_apt              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ price                    <dbl> 8.779557, 7.309881, 7.783224, 7.374629, 8.131…
## $ tfidf_title_1            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_1.5          <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_1ba          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_1br          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_2            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_2.5          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2ba          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2bd          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2br          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_3            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_3br          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_4            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_a            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_amp          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_and          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_apartment    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_apt          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_area         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_at           <dbl> 1.1229922, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_available    <dbl> 0.0000000, 0.0000000, 0.5885586, 0.0000000, 0…
## $ tfidf_title_ba           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_bart         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bath         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_bay          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bd           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_beautiful    <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.424…
## $ tfidf_title_bed          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bedroom      <dbl> 0.0000000, 0.0000000, 0.4088961, 0.0000000, 0…
## $ tfidf_title_br           <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_charming     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_city         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_close        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_community    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_condo        <dbl> 0.8358416, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_cupertino    <dbl> 0.0000000, 1.0431691, 0.0000000, 0.0000000, 0…
## $ tfidf_title_d            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_downtown     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_duplex       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_family       <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_floor        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_for          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_free         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_from         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_fully        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_furnished    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_garage       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_garden       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_gorgeous     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_great        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_heart        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_hill         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_home         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_house        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_in           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_large        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_living       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_located      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_location     <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_loft         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_luxury       <dbl> 1.1229922, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_modern       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_month        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_move         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_near         <dbl> 0.0000000, 0.8198097, 0.0000000, 0.0000000, 0…
## $ tfidf_title_neighborhood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_new          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_nice         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_now          <dbl> 0.0000000, 0.0000000, 0.6646053, 0.0000000, 0…
## $ tfidf_title_of           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_on           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_one          <dbl> 0.0000000, 0.0000000, 0.6356108, 0.0000000, 0…
## $ tfidf_title_open         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_park         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_parking      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_quiet        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_remodeled    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_rent         <dbl> 0.0000000, 0.6636341, 0.0000000, 0.0000000, 0…
## $ tfidf_title_san          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_schools      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_see          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_single       <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_spacious     <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_studio       <dbl> 0.0000000, 0.8252187, 0.0000000, 0.0000000, 0…
## $ tfidf_title_the          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_this         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_to           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_today        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_top          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_townhome     <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_townhouse    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_two          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_unit         <dbl> 0.0000000, 0.0000000, 0.0000000, 3.3008747, 0…
## $ tfidf_title_updated      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_valley       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_view         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_views        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_w            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_walk         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_with         <dbl> 0.0000000, 0.0000000, 0.4718561, 0.0000000, 0…
## $ tfidf_title_your         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ nhood_other              <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune()) %>% 
  set_mode("regression") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_recipe) %>% 
  add_model(xgboost_spec) 

set.seed(15793)
doParallel::registerDoParallel()
xgboost_tune <-
  tune_grid(xgboost_workflow, 
            resamples = data_folds, 
            grid = 5)

Explore Results

show_best(xgboost_tune, metric = "rmse")

## # A tibble: 5 × 8
##   trees min_n .metric .estimator  mean     n std_err .config             
##   <int> <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
## 1    83    14 rmse    standard   0.351    10 0.00873 Preprocessor1_Model2
## 2   458    31 rmse    standard   0.379    10 0.00938 Preprocessor1_Model4
## 3   923    38 rmse    standard   0.393    10 0.0113  Preprocessor1_Model5
## 4  1637    20 rmse    standard   0.397    10 0.00744 Preprocessor1_Model3
## 5  1593     4 rmse    standard   0.402    10 0.00800 Preprocessor1_Model1

# How did all the possible parameter combinations do?
autoplot(xgboost_tune)

We can finalize our random forest workflow with the best performing parameters.

final_rf <- xgboost_workflow %>% 
    finalize_workflow(select_best(xgboost_tune, "rmse"))

The function last_fit() fits this finalized random forest one last time to the training data and evaluates one last time on the testing data.

data_fit <- last_fit(final_rf, data_split)
data_fit

## # Resampling results
## # Manual resampling 
## # A tibble: 1 × 6
##   splits             id               .metrics .notes   .predictions .workflow 
##   <list>             <chr>            <list>   <list>   <list>       <list>    
## 1 <split [1150/384]> train/test split <tibble> <tibble> <tibble>     <workflow>

Evaluate model

collect_metrics(data_fit)

## # A tibble: 2 × 4
##   .metric .estimator .estimate .config             
##   <chr>   <chr>          <dbl> <chr>               
## 1 rmse    standard       0.322 Preprocessor1_Model1
## 2 rsq     standard       0.484 Preprocessor1_Model1

collect_predictions(data_fit)

## # A tibble: 384 × 5
##    id               .pred  .row price .config             
##    <chr>            <dbl> <int> <dbl> <chr>               
##  1 train/test split  7.89     1  7.72 Preprocessor1_Model1
##  2 train/test split  7.92     2  7.88 Preprocessor1_Model1
##  3 train/test split  7.73     3  7.58 Preprocessor1_Model1
##  4 train/test split  7.78     5  8.10 Preprocessor1_Model1
##  5 train/test split  8.12     7  8.97 Preprocessor1_Model1
##  6 train/test split  7.35     9  7.72 Preprocessor1_Model1
##  7 train/test split  8.36    12  7.88 Preprocessor1_Model1
##  8 train/test split  7.74    14  7.74 Preprocessor1_Model1
##  9 train/test split  7.72    15  7.70 Preprocessor1_Model1
## 10 train/test split  7.64    18  7.70 Preprocessor1_Model1
## # ℹ 374 more rows

collect_predictions(data_fit) %>%
    ggplot(aes(price, .pred)) +
    geom_point(alpha = 0.5, fill = "midnightblue") +
    geom_abline(lty = 2, color = "gray50") +
    coord_fixed()