Goal: to predict the rental prices in the SF rental market
Click here for the data
rent <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-07-05/rent.csv')
skimr::skim(rent)
| Name | rent |
| Number of rows | 200796 |
| Number of columns | 17 |
| _______________________ | |
| Column type frequency: | |
| character | 8 |
| numeric | 9 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| post_id | 0 | 1.00 | 9 | 14 | 0 | 200796 | 0 |
| nhood | 0 | 1.00 | 4 | 43 | 0 | 167 | 0 |
| city | 0 | 1.00 | 5 | 19 | 0 | 104 | 0 |
| county | 1394 | 0.99 | 4 | 13 | 0 | 10 | 0 |
| address | 196888 | 0.02 | 1 | 38 | 0 | 2869 | 0 |
| title | 2517 | 0.99 | 2 | 298 | 0 | 184961 | 0 |
| descr | 197542 | 0.02 | 13 | 16975 | 0 | 3025 | 0 |
| details | 192780 | 0.04 | 4 | 595 | 0 | 7667 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| date | 0 | 1.00 | 20095718.38 | 44694.07 | 20000902.00 | 20050227.00 | 20110924.00 | 20120805.0 | 20180717.00 | ▁▇▁▆▃ |
| year | 0 | 1.00 | 2009.51 | 4.48 | 2000.00 | 2005.00 | 2011.00 | 2012.0 | 2018.00 | ▁▇▁▆▃ |
| price | 0 | 1.00 | 2135.36 | 1427.75 | 220.00 | 1295.00 | 1800.00 | 2505.0 | 40000.00 | ▇▁▁▁▁ |
| beds | 6608 | 0.97 | 1.89 | 1.08 | 0.00 | 1.00 | 2.00 | 3.0 | 12.00 | ▇▂▁▁▁ |
| baths | 158121 | 0.21 | 1.68 | 0.69 | 1.00 | 1.00 | 2.00 | 2.0 | 8.00 | ▇▁▁▁▁ |
| sqft | 136117 | 0.32 | 1201.83 | 5000.22 | 80.00 | 750.00 | 1000.00 | 1360.0 | 900000.00 | ▇▁▁▁▁ |
| room_in_apt | 0 | 1.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.0 | 1.00 | ▇▁▁▁▁ |
| lat | 193145 | 0.04 | 37.67 | 0.35 | 33.57 | 37.40 | 37.76 | 37.8 | 40.43 | ▁▁▅▇▁ |
| lon | 196484 | 0.02 | -122.21 | 0.78 | -123.20 | -122.42 | -122.26 | -122.0 | -74.20 | ▇▁▁▁▁ |
data <- rent %>%
# Treat missing values
select(-address, -descr, -details, -lat, -lon, -date, -year, -room_in_apt) %>%
na.omit()
Identify good predictors
sqft
data %>%
ggplot(aes(price, sqft)) +
scale_y_log10() +
geom_point()
beds
data %>%
ggplot(aes(price, as.factor(beds))) +
geom_boxplot()
title
data %>%
# tokenize title
unnest_tokens(output = word, input = title) %>%
# calculate avg rent per word
group_by(word) %>%
summarise(price = mean(price),
n = n()) %>%
ungroup() %>%
filter(n > 10, !str_detect(word, "\\d")) %>%
slice_max(order_by = price, n = 20)
## # A tibble: 20 × 3
## word price n
## <chr> <dbl> <int>
## 1 atherton 8547. 16
## 2 millennium 8473. 11
## 3 woodside 7867. 12
## 4 j.wavro 7822. 13
## 5 estate 7565. 24
## 6 roof 7464. 18
## 7 gpk 7409. 11
## 8 id 7142. 13
## 9 decks 7135. 20
## 10 relisto 6964. 20
## 11 pano 6759. 17
## 12 cow 6188. 28
## 13 telegraph 6174. 22
## 14 infinity 6158. 11
## 15 hollow 5990. 32
## 16 residence 5897. 16
## 17 tower 5637. 20
## 18 foundation 5626. 21
## 19 doorman 5582. 20
## 20 lumina 5507. 14
EDA shortcut
# step 1: prepare data
data_binarized_tbl <-data %>%
select(-post_id, -title) %>%
binarize()
data_binarized_tbl %>% glimpse()
## Rows: 14,394
## Columns: 85
## $ nhood__campbell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__concord_/_pleasant_hill_/_martinez` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__cupertino <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__daly_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__danville_/_san_ramon` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__dublin_/_pleasanton` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__fairfield_/_vacaville` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__foster_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__hayward_/_castro_valley` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__milpitas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__mountain_view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__napa_county <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__palo_alto <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__petaluma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__pittsburg_/_antioch` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__rohnert_pk_/_cotati` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_central <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_east <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_north <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_south <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_west <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_rafael <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_clara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_rosa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__SOMA_/_south_beach` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__sunnyvale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__union_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__vallejo_/_benicia` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__willow_glen_/_cambrian` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__-OTHER` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ city__cambrian <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__campbell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__concord <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__cupertino <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__daly_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__dublin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__fairfield <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__foster_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__hayward <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__milpitas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__mountain_view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__napa_county <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__oakland <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__palo_alto <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__petaluma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__pittsburg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__rohnert_park <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_jose <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_rafael <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_ramon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_clara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_rosa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__sunnyvale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__union_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__vallejo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `city__-OTHER` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ county__alameda <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ county__contra_costa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__marin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__napa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__san_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__san_mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__santa_clara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__santa_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__solano <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__sonoma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `price__-Inf_1850` <dbl> 0, 1, 0, 1, 0, 1, 1, 0, 0,…
## $ price__1850_2450 <dbl> 0, 0, 1, 0, 0, 0, 0, 1, 1,…
## $ price__2450_3225 <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ price__3225_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `beds__-Inf_2` <dbl> 0, 1, 0, 1, 1, 1, 0, 0, 1,…
## $ beds__2_3 <dbl> 0, 0, 1, 0, 0, 0, 1, 1, 0,…
## $ beds__3_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `baths__-Inf_2` <dbl> 0, 1, 1, 1, 1, 1, 0, 0, 1,…
## $ baths__2_Inf <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0,…
## $ `sqft__-Inf_887` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1,…
## $ sqft__887_1100 <dbl> 0, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ sqft__1100_1500 <dbl> 0, 0, 1, 0, 1, 1, 0, 0, 0,…
## $ sqft__1500_Inf <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0,…
# step 2: correlate
data_corr_tbl <- data_binarized_tbl %>%
correlate(price__3225_Inf)
# step 3: plot
data_corr_tbl %>%
plot_correlation_funnel()
## Warning: ggrepel: 69 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
data <- sample_n(data, 100)
# Split into training and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
# further split training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [67/8]> Fold01
## 2 <split [67/8]> Fold02
## 3 <split [67/8]> Fold03
## 4 <split [67/8]> Fold04
## 5 <split [67/8]> Fold05
## 6 <split [68/7]> Fold06
## 7 <split [68/7]> Fold07
## 8 <split [68/7]> Fold08
## 9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
library(usemodels)
usemodels::use_xgboost(price ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = price ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(6804)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# specify recipe
xgboost_recipe <-
recipe(formula = price ~ ., data = data_train) %>%
recipes::update_role(post_id, new_role = "id variable") %>%
step_tokenize(title) %>%
step_tokenfilter(max_tokens = 100) %>%
step_tfidf(title) %>%
step_other(nhood, city) %>%
step_dummy(nhood, city, county, one_hot = TRUE) %>%
step_YeoJohnson(sqft, beds, baths)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 474
## $ post_id <fct> 4611893739, 4935238058, 5830676495, 5…
## $ beds <dbl> 0.7584193, 1.2685806, 1.2685806, 1.26…
## $ baths <dbl> 0.5064741, 0.5064741, 0.6794059, 0.67…
## $ sqft <dbl> 1.657172, 1.661639, 1.664611, 1.66779…
## $ price <dbl> 1950, 2638, 2954, 3500, 2195, 1700, 1…
## $ tfidf_title_1 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ `tfidf_title_1,970` <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ `tfidf_title_1,974` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1.5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1.5ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_10 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_1000ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1016ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1025 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1052ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1056ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1060ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1085ft2 <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_1088ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1100ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1100sf <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1134ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1155ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1192ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_12 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1270ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1272ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1285 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1290ft2 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1295 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1300ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1321 <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_1330ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1350ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_14 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1400 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1407sq <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1450 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1454ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1466 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1485 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1495 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_15 <dbl> 0.0000000, 0.0000000, 0.2433772, 0.00…
## $ tfidf_title_1500ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1550 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_15th <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1645 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1650 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1695 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_17 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1700ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1735 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_18 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1800 <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_1800ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_19 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1900 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1950 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1970ft2 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1974ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1980ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1b <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1ba <dbl> 0.2339399, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1bath <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_1bd <dbl> 0.4330733, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1bed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1br <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1st <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2 <dbl> 0.00000000, 0.00000000, 0.08047286, 0…
## $ tfidf_title_2.5 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2.5ba <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2000 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2050 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_21 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2169 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2195 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_22 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2290 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_23 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2350 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_237 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2400 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2500 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2565 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_26 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2781 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2789 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_280 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2800 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_29 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2900 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2954 <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_2997 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2b <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2bath <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2bathcondo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2bd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2bed <dbl> 0.0000000, 0.4563323, 0.0000000, 0.00…
## $ tfidf_title_2br <dbl> 0.00000000, 0.00000000, 0.05329519, 0…
## $ tfidf_title_3 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3.5ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3000 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3000ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_31 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3184ft2 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3250 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3381 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3600ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_366 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3900 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3bd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_4 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_4437 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_488ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_4br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_5 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_525ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_5500 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_5850 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_5br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_6500 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_650ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_650ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_665ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_6br <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_700 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_773ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_784ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_785sqft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_8 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_806ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_815ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_816ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_85 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_8950 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_9 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_915ft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_950ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_980ft2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_a <dbl> 0.2602690, 0.0000000, 0.0000000, 0.26…
## $ tfidf_title_about <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_absolutely <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_access <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_acre <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_adeline <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_al <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_alma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_almaden <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_amazing <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_and <dbl> 0.2772589, 0.0000000, 0.0000000, 0.27…
## $ tfidf_title_antioch <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_apartment <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_apartmentsemeryville <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_apt <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ask <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_at <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_aug <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_available <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ave <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_azari <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_b <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_back <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_backyard <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_balcony <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_barn <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bart <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bascom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bath <dbl> 0.00000000, 0.00000000, 0.04023643, 0…
## $ tfidf_title_bathes <dbl> 0.0000000, 0.0000000, 0.0000000, 0.43…
## $ tfidf_title_bathroom <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_baths <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bd <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bdr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_beautiful <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_beautifulandbrandnew <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_beautifully <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bed <dbl> 0.00000000, 0.00000000, 0.00000000, 0…
## $ tfidf_title_bedroom <dbl> 0.00000000, 0.00000000, 0.05474092, 0…
## $ tfidf_title_bedrooms <dbl> 0.0000000, 0.0000000, 0.0000000, 0.36…
## $ tfidf_title_beds <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_big <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_blossom <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_buena <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ca <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_california <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_car <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_carlos <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_castro <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_centr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_central <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_century <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_charming <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cheap <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_close <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cloverdale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_comfortable <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_community <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_completely <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_condo <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_contemporary <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_control <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_convenient <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_copertino <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_corey <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_county <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_craftsman <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_crown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cupertino <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_custom <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_d <dbl> 0.3650658, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_daily <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_dec <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_den <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_detached <dbl> 0.4330733, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_dining <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_district <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_dolores <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_downstairs <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_downtown <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_dryer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_dublin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_duboce <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_duplex <dbl> 0.3650658, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_easy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_edwardian <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_elegant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_equipped <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_executive <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_expressway <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_fairfield <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_family <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_fantastic <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_favorite <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_fenced <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_find <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_fireplace <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_first <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_flat <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_flexible <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_floor <dbl> 0.0000000, 0.3728942, 0.0000000, 0.00…
## $ tfidf_title_floors <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_foods <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_for <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_free <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_from <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_fully <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_garage <dbl> 0.2983153, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_garaged <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_garden <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_gardens <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_gated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_geri <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_google <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_gorgeous <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_gourmet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_granite <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_great <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_grind <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ground <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_gym <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_half <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_har <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_hardwood <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_heights <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_here <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hide <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_high <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hill <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_hillside <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_holiday <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_home <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_house <dbl> 0.0000000, 0.0000000, 0.0000000, 0.26…
## $ tfidf_title_hse <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_huge <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hwys <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_in <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_is <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_jose <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_kit <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_kitchen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lafayette <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_lakeshore <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_large <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_laundry <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_lease <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_like <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_livermore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_living <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_location <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_loft <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lotsofclosets <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lovely <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_luxurious <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_luxury <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_m <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_map <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_mar <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_market <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_medical <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mediterranean <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_meridian <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_mid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mile <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_mins <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_minutes <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_mission <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_modern <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_month <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_montrachet <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_more <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_move <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_must <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_napa <dbl> 0.0000000, 0.0000000, 0.2887156, 0.00…
## $ tfidf_title_near <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_neighborhood <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_new <dbl> 0.0000000, 0.0000000, 0.0000000, 0.32…
## $ tfidf_title_newly <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_next <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_nice <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_northpointe <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_nov <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_now <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_oca <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_occupancy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_of <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_off <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_office <dbl> 0.0000000, 0.0000000, 0.0000000, 0.43…
## $ tfidf_title_on <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_one <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_only <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_open <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_our <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_page <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_park <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_parking <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_parkside <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_patio <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pets <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pic <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_pittsburg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pleasanton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pm <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_pool <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_post <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_posting <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_price <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_quiet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_rafael <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ranch <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ready <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_recently <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_reduction <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_relaxing <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_remodel <dbl> 0.0000000, 0.0000000, 0.0000000, 0.43…
## $ tfidf_title_remodeled <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_rent <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_rental <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_resim <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_restore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_rise <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_roewill <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_room <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_row <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_s <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_s.c <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_san <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sanctuary <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_santa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_santana <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_saratoga <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sausalito <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_school <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_seabright <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_see <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_setting <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_sfr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_shopping <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_should <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_single <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_site <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_size <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_spa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_spacious <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_spanish <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_specials <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_sq <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_sq.ft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sqft <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_st <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_stanford <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_studio <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_style <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_stylish <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_şub <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunday <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunnyvale <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunroom <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunset <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_the <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_this <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_to <dbl> 0.00000000, 0.00000000, 0.08675632, 0…
## $ tfidf_title_today <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_top <dbl> 0.0000000, 0.4072621, 0.0000000, 0.00…
## $ tfidf_title_town <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_townhome <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_townhouse <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_traditional <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_two <dbl> 0.0000000, 0.0000000, 0.0000000, 0.29…
## $ tfidf_title_unit <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_updated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_vacaville <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_valley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_very <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_views <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_w <dbl> 0.2233592, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_walk <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_washer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_week <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_welcome <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_west <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_whole <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_wifi <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_with <dbl> 0.2233592, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_wood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_wow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_yard <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_yerba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_you <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_your <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_게시물 <dbl> 0.0000000, 0.0000000, 0.2172064, 0.00…
## $ tfidf_title_게시물을 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_복구 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_사진 <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_설정 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_숨김 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_월 <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_이 <dbl> 0.0000000, 0.0000000, 0.3258097, 0.00…
## $ tfidf_title_즐겨찾기로 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_지도 <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_표시 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_图片 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_地图 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_月 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nhood_castro <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nhood_dublin...pleasanton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nhood_mountain.view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ nhood_san.jose.west <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ nhood_other <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1…
## $ city_dublin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ city_mountain.view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ city_san.francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ city_san.jose <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0…
## $ city_other <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1…
## $ county_alameda <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ county_contra.costa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ county_marin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_napa <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_san.francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_san.mateo <dbl> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_santa.clara <dbl> 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0…
## $ county_santa.cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_solano <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_sonoma <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
# Specify model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine recipe and model using workflow
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
# tune hyperparameters
set.seed(344)
xgboost_tune <-
tune_grid(xgboost_workflow, resamples = data_cv,
grid = 5)
tune::show_best(xgboost_tune, metric = "rmse")
## # A tibble: 5 × 10
## mtry trees min_n learn_rate .metric .estimator mean n std_err .config
## <int> <int> <int> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 149 1104 28 0.00484 rmse standard 1242. 10 165. Preproces…
## 2 28 1613 36 0.0290 rmse standard 1276. 10 192. Preproces…
## 3 373 1524 23 0.0836 rmse standard 1324. 10 127. Preproces…
## 4 280 768 12 0.112 rmse standard 1406. 10 180. Preproces…
## 5 422 162 7 0.00108 rmse standard 2595. 10 242. Preproces…
# update the model by selecting the best hyperparameters.
xgboost_fw <- tune:: finalize_workflow(xgboost_workflow,
tune::select_best(xgboost_tune, metric = "rmse"))
# fit the model on the entire training data and test it on the test data
data_fit <- tune::last_fit(xgboost_fw, data_split)
tune::collect_metrics(data_fit)
## # A tibble: 2 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 rmse standard 2050. Preprocessor1_Model1
## 2 rsq standard 0.198 Preprocessor1_Model1
tune::collect_predictions(data_fit) %>%
ggplot(aes(price, .pred)) +
geom_point(alpha = 0.3, fill = "midnightblue") +
geom_abline(lty = 2, color = "gray50") +
coord_fixed()