Goal: to predict the rental prices in the SF rental market

Click here for the data

Import Data

rent <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-07-05/rent.csv')

skimr::skim(rent) 
Data summary
Name rent
Number of rows 200796
Number of columns 17
_______________________
Column type frequency:
character 8
numeric 9
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
post_id 0 1.00 9 14 0 200796 0
nhood 0 1.00 4 43 0 167 0
city 0 1.00 5 19 0 104 0
county 1394 0.99 4 13 0 10 0
address 196888 0.02 1 38 0 2869 0
title 2517 0.99 2 298 0 184961 0
descr 197542 0.02 13 16975 0 3025 0
details 192780 0.04 4 595 0 7667 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
date 0 1.00 20095718.38 44694.07 20000902.00 20050227.00 20110924.00 20120805.0 20180717.00 ▁▇▁▆▃
year 0 1.00 2009.51 4.48 2000.00 2005.00 2011.00 2012.0 2018.00 ▁▇▁▆▃
price 0 1.00 2135.36 1427.75 220.00 1295.00 1800.00 2505.0 40000.00 ▇▁▁▁▁
beds 6608 0.97 1.89 1.08 0.00 1.00 2.00 3.0 12.00 ▇▂▁▁▁
baths 158121 0.21 1.68 0.69 1.00 1.00 2.00 2.0 8.00 ▇▁▁▁▁
sqft 136117 0.32 1201.83 5000.22 80.00 750.00 1000.00 1360.0 900000.00 ▇▁▁▁▁
room_in_apt 0 1.00 0.00 0.04 0.00 0.00 0.00 0.0 1.00 ▇▁▁▁▁
lat 193145 0.04 37.67 0.35 33.57 37.40 37.76 37.8 40.43 ▁▁▅▇▁
lon 196484 0.02 -122.21 0.78 -123.20 -122.42 -122.26 -122.0 -74.20 ▇▁▁▁▁
data <- rent %>%
    
    # Treat missing values
    select(-address, -descr, -details, -lat, -lon, -date, -year, -room_in_apt) %>%
    na.omit()

Explore Data

Identify good predictors

sqft

data %>%
    ggplot(aes(price, sqft)) +
    scale_y_log10() +
    geom_point()

beds

data %>%
    ggplot(aes(price, as.factor(beds))) +
    geom_boxplot()

title

data %>%
    
    # tokenize title
    unnest_tokens(output = word, input = title) %>%
    
    # calculate avg rent per word
    group_by(word) %>%
    summarise(price = mean(price),
              n     = n()) %>%
    ungroup() %>%
    
    filter(n > 10, !str_detect(word, "\\d")) %>%
    slice_max(order_by = price, n = 20)
## # A tibble: 20 × 3
##    word       price     n
##    <chr>      <dbl> <int>
##  1 atherton   8547.    16
##  2 millennium 8473.    11
##  3 woodside   7867.    12
##  4 j.wavro    7822.    13
##  5 estate     7565.    24
##  6 roof       7464.    18
##  7 gpk        7409.    11
##  8 id         7142.    13
##  9 decks      7135.    20
## 10 relisto    6964.    20
## 11 pano       6759.    17
## 12 cow        6188.    28
## 13 telegraph  6174.    22
## 14 infinity   6158.    11
## 15 hollow     5990.    32
## 16 residence  5897.    16
## 17 tower      5637.    20
## 18 foundation 5626.    21
## 19 doorman    5582.    20
## 20 lumina     5507.    14

EDA shortcut

 # step 1: prepare data
data_binarized_tbl <-data %>%
    select(-post_id, -title) %>%
    binarize()
data_binarized_tbl %>% glimpse()
## Rows: 14,394
## Columns: 85
## $ nhood__campbell                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__concord_/_pleasant_hill_/_martinez` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__cupertino                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__daly_city                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__danville_/_san_ramon`               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__dublin_/_pleasanton`                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__fairfield_/_vacaville`              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__foster_city                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__hayward_/_castro_valley`            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__milpitas                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__mountain_view                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__napa_county                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__palo_alto                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__petaluma                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__pittsburg_/_antioch`                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__rohnert_pk_/_cotati`                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_francisco                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_central                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_east                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_north                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_south                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_west                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_mateo                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_rafael                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_clara                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_cruz                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_rosa                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__SOMA_/_south_beach`                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__sunnyvale                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__union_city                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__vallejo_/_benicia`                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__willow_glen_/_cambrian`             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__-OTHER`                             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ city__cambrian                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__campbell                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__concord                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__cupertino                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__daly_city                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__dublin                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__fairfield                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__foster_city                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__hayward                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__milpitas                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__mountain_view                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__napa_county                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__oakland                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__palo_alto                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__petaluma                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__pittsburg                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__rohnert_park                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_francisco                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_jose                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_mateo                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_rafael                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_ramon                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_clara                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_cruz                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_rosa                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__sunnyvale                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__union_city                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__vallejo                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `city__-OTHER`                              <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ county__alameda                             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ county__contra_costa                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__marin                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__napa                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__san_francisco                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__san_mateo                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__santa_clara                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__santa_cruz                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__solano                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__sonoma                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `price__-Inf_1850`                          <dbl> 0, 1, 0, 1, 0, 1, 1, 0, 0,…
## $ price__1850_2450                            <dbl> 0, 0, 1, 0, 0, 0, 0, 1, 1,…
## $ price__2450_3225                            <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ price__3225_Inf                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `beds__-Inf_2`                              <dbl> 0, 1, 0, 1, 1, 1, 0, 0, 1,…
## $ beds__2_3                                   <dbl> 0, 0, 1, 0, 0, 0, 1, 1, 0,…
## $ beds__3_Inf                                 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `baths__-Inf_2`                             <dbl> 0, 1, 1, 1, 1, 1, 0, 0, 1,…
## $ baths__2_Inf                                <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0,…
## $ `sqft__-Inf_887`                            <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1,…
## $ sqft__887_1100                              <dbl> 0, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ sqft__1100_1500                             <dbl> 0, 0, 1, 0, 1, 1, 0, 0, 0,…
## $ sqft__1500_Inf                              <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0,…
 # step 2: correlate
data_corr_tbl <- data_binarized_tbl %>%
    correlate(price__3225_Inf)
# step 3: plot 
data_corr_tbl %>%
    plot_correlation_funnel()
## Warning: ggrepel: 69 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Build Models

data <- sample_n(data, 100)

# Split into training and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)

# further split training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## #  10-fold cross-validation 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [67/8]> Fold01
##  2 <split [67/8]> Fold02
##  3 <split [67/8]> Fold03
##  4 <split [67/8]> Fold04
##  5 <split [67/8]> Fold05
##  6 <split [68/7]> Fold06
##  7 <split [68/7]> Fold07
##  8 <split [68/7]> Fold08
##  9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
library(usemodels)
usemodels::use_xgboost(price ~ ., data = data_train)
## xgboost_recipe <- 
##   recipe(formula = price ~ ., data = data_train) %>% 
##   step_zv(all_predictors()) 
## 
## xgboost_spec <- 
##   boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
##     loss_reduction = tune(), sample_size = tune()) %>% 
##   set_mode("classification") %>% 
##   set_engine("xgboost") 
## 
## xgboost_workflow <- 
##   workflow() %>% 
##   add_recipe(xgboost_recipe) %>% 
##   add_model(xgboost_spec) 
## 
## set.seed(6804)
## xgboost_tune <-
##   tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# specify recipe
xgboost_recipe <- 
    recipe(formula = price ~ ., data = data_train) %>% 
  recipes::update_role(post_id, new_role = "id variable") %>%
    step_tokenize(title) %>%
    step_tokenfilter(max_tokens = 100) %>%
    step_tfidf(title) %>%
    step_other(nhood, city) %>%
    step_dummy(nhood, city, county, one_hot = TRUE) %>%
    step_YeoJohnson(sqft, beds, baths)
    


xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 474
## $ post_id                          <fct> 4611893739, 4935238058, 5830676495, 5…
## $ beds                             <dbl> 0.7584193, 1.2685806, 1.2685806, 1.26…
## $ baths                            <dbl> 0.5064741, 0.5064741, 0.6794059, 0.67…
## $ sqft                             <dbl> 1.657172, 1.661639, 1.664611, 1.66779…
## $ price                            <dbl> 1950, 2638, 2954, 3500, 2195, 1700, 1…
## $ tfidf_title_1                    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ `tfidf_title_1,970`              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ `tfidf_title_1,974`              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1.5                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1.5ba                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_10                   <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_1000ft2              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1016ft2              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1025                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1052ft               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1056ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1060ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1085ft2              <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_1088ft2              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1100ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1100sf               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1134ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1155ft               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1192ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_12                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1200                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1270ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1272ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1285                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1290ft2              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1295                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1300ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1321                 <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_1330ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1350ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_14                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1400                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1407sq               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1450                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1454ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1466                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1485                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1495                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_15                   <dbl> 0.0000000, 0.0000000, 0.2433772, 0.00…
## $ tfidf_title_1500ft               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1550                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_15th                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1645                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1650                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1695                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_17                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1700ft               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1735                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_18                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1800                 <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_1800ft2              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_19                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1900                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1950                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1970ft2              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1974ft2              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1980ft               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1b                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1ba                  <dbl> 0.2339399, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1bath                <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_1bd                  <dbl> 0.4330733, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1bed                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1br                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_1st                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2                    <dbl> 0.00000000, 0.00000000, 0.08047286, 0…
## $ tfidf_title_2.5                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2.5ba                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2000                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2050                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_21                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2169                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2195                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_22                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2290                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_23                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2350                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_237                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2400                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2500                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2565                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_26                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2781                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2789                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_280                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2800                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_29                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2900                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2954                 <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_2997                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2b                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2ba                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2bath                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2bathcondo           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2bd                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_2bed                 <dbl> 0.0000000, 0.4563323, 0.0000000, 0.00…
## $ tfidf_title_2br                  <dbl> 0.00000000, 0.00000000, 0.05329519, 0…
## $ tfidf_title_3                    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3.5ba                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3000                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3000ft               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_31                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3184ft2              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3250                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3381                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3600ft               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_366                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3900                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_3bd                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_3br                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_4                    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_4437                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_488ft                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_4br                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_5                    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_525ft2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_5500                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_5850                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_5br                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_6500                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_650ft                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_650ft2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_665ft2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_6br                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_700                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_773ft2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_784ft                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_785sqft              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_8                    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_806ft                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_815ft                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_816ft2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_85                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_8950                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_9                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_915ft                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_950ft                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_980ft2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_a                    <dbl> 0.2602690, 0.0000000, 0.0000000, 0.26…
## $ tfidf_title_about                <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_absolutely           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_access               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_acre                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_adeline              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_al                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_alma                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_almaden              <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_amazing              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_and                  <dbl> 0.2772589, 0.0000000, 0.0000000, 0.27…
## $ tfidf_title_antioch              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_apartment            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_apartmentsemeryville <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_apt                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ara                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ask                  <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_at                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_aug                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_available            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ave                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_azari                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_b                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ba                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_back                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_backyard             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_balcony              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_barn                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bart                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bascom               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bath                 <dbl> 0.00000000, 0.00000000, 0.04023643, 0…
## $ tfidf_title_bathes               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.43…
## $ tfidf_title_bathroom             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_baths                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bd                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bdr                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_beautiful            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_beautifulandbrandnew <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_beautifully          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bed                  <dbl> 0.00000000, 0.00000000, 0.00000000, 0…
## $ tfidf_title_bedroom              <dbl> 0.00000000, 0.00000000, 0.05474092, 0…
## $ tfidf_title_bedrooms             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.36…
## $ tfidf_title_beds                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_big                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_blossom              <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_br                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bth                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_buena                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ca                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_california           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_car                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_carlos               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_castro               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_centr                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_central              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_century              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_charming             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cheap                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_close                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cloverdale           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_comfortable          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_community            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_completely           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_condo                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_contemporary         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_control              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_convenient           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_copertino            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_corey                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_county               <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_craftsman            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_crown                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cruz                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_cupertino            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_custom               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_d                    <dbl> 0.3650658, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_daily                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_dec                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_den                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_detached             <dbl> 0.4330733, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_dining               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_district             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_dolores              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_downstairs           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_downtown             <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_dryer                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_dublin               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_duboce               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_duplex               <dbl> 0.3650658, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_easy                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_edwardian            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_elegant              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_equipped             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_executive            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_expressway           <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_fairfield            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_family               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_fantastic            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_favorite             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_fenced               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_find                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_fireplace            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_first                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_flat                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_flexible             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_floor                <dbl> 0.0000000, 0.3728942, 0.0000000, 0.00…
## $ tfidf_title_floors               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_foods                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_for                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_francisco            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_free                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_from                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ft                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_fully                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_garage               <dbl> 0.2983153, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_garaged              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_garden               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_gardens              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_gated                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_geri                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_google               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_gorgeous             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_gourmet              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_granite              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_great                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_grind                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ground               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_gym                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_half                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_har                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_hardwood             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_heights              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_here                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hide                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_high                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hill                 <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_hillside             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_holiday              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_home                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_house                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.26…
## $ tfidf_title_hse                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_huge                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hwys                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_in                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_is                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_jose                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_kit                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_kitchen              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lafayette            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_lakeshore            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_large                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_laundry              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_lease                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_like                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_livermore            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_living               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_location             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_loft                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lotsofclosets        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lovely               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_luxurious            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_luxury               <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_m                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_map                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_mar                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_market               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_medical              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mediterranean        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_meridian             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_mid                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mile                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_mins                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_minutes              <dbl> 0.0000000, 0.0000000, 0.1443578, 0.00…
## $ tfidf_title_mission              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_modern               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_month                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_montrachet           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_more                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_move                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_must                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_napa                 <dbl> 0.0000000, 0.0000000, 0.2887156, 0.00…
## $ tfidf_title_near                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_neighborhood         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_new                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.32…
## $ tfidf_title_newly                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_next                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_nice                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_northpointe          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_nov                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_now                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_oca                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_occupancy            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_of                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_off                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_office               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.43…
## $ tfidf_title_on                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_one                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_only                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_open                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_our                  <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_page                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_park                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_parking              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_parkside             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_patio                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pets                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pic                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_pittsburg            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pleasanton           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_pm                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_pool                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_post                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_posting              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_price                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_quiet                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_rafael               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ranch                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_ready                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_recently             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_reduction            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_relaxing             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_remodel              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.43…
## $ tfidf_title_remodeled            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_rent                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_rental               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_resim                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_restore              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_rise                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_roewill              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_room                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_row                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_s                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_s.c                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_san                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sanctuary            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_santa                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_santana              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_saratoga             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sausalito            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_school               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_seabright            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_see                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_setting              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_sfr                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_shopping             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_should               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_single               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_site                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_size                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_spa                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_spacious             <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_spanish              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_specials             <dbl> 0.0000000, 0.5413417, 0.0000000, 0.00…
## $ tfidf_title_sq                   <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_sq.ft                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sqft                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_st                   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_stanford             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_studio               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_style                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_stylish              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_şub                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunday               <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunnyvale            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunroom              <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_sunset               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_the                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_this                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_to                   <dbl> 0.00000000, 0.00000000, 0.08675632, 0…
## $ tfidf_title_today                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_top                  <dbl> 0.0000000, 0.4072621, 0.0000000, 0.00…
## $ tfidf_title_town                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_townhome             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_townhouse            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_traditional          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_two                  <dbl> 0.0000000, 0.0000000, 0.0000000, 0.29…
## $ tfidf_title_unit                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_updated              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_vacaville            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_valley               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_very                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_view                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_views                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_w                    <dbl> 0.2233592, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_walk                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_washer               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_week                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_welcome              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_west                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_whole                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_wifi                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_with                 <dbl> 0.2233592, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_wood                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_wow                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_yard                 <dbl> 0.000000, 0.000000, 0.000000, 0.00000…
## $ tfidf_title_yerba                <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_you                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_your                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_게시물               <dbl> 0.0000000, 0.0000000, 0.2172064, 0.00…
## $ tfidf_title_게시물을             <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_복구                 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_사진                 <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_설정                 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_숨김                 <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_월                   <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_이                   <dbl> 0.0000000, 0.0000000, 0.3258097, 0.00…
## $ tfidf_title_즐겨찾기로           <dbl> 0.0000000, 0.0000000, 0.1086032, 0.00…
## $ tfidf_title_지도                 <dbl> 0.00000000, 0.00000000, 0.07797997, 0…
## $ tfidf_title_표시                 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_图片                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_地图                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_月                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nhood_castro                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nhood_dublin...pleasanton        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nhood_mountain.view              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ nhood_san.jose.west              <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ nhood_other                      <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1…
## $ city_dublin                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ city_mountain.view               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ city_san.francisco               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ city_san.jose                    <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0…
## $ city_other                       <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1…
## $ county_alameda                   <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ county_contra.costa              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ county_marin                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_napa                      <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_san.francisco             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_san.mateo                 <dbl> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_santa.clara               <dbl> 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0…
## $ county_santa.cruz                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_solano                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ county_sonoma                    <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
# Specify model
xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>% 
  set_mode("regression") %>% 
  set_engine("xgboost") 

# Combine recipe and model using workflow
xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_recipe) %>% 
  add_model(xgboost_spec) 

# tune hyperparameters
set.seed(344)
xgboost_tune <-
    tune_grid(xgboost_workflow, resamples = data_cv,
              grid = 5)

Evaluate Models

tune::show_best(xgboost_tune, metric = "rmse")
## # A tibble: 5 × 10
##    mtry trees min_n learn_rate .metric .estimator  mean     n std_err .config   
##   <int> <int> <int>      <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>     
## 1   149  1104    28    0.00484 rmse    standard   1242.    10    165. Preproces…
## 2    28  1613    36    0.0290  rmse    standard   1276.    10    192. Preproces…
## 3   373  1524    23    0.0836  rmse    standard   1324.    10    127. Preproces…
## 4   280   768    12    0.112   rmse    standard   1406.    10    180. Preproces…
## 5   422   162     7    0.00108 rmse    standard   2595.    10    242. Preproces…
 # update the model by selecting the best hyperparameters.

xgboost_fw <- tune:: finalize_workflow(xgboost_workflow, 
                         tune::select_best(xgboost_tune, metric = "rmse"))

# fit the model on the entire training data and test it on the test data
data_fit <- tune::last_fit(xgboost_fw, data_split)
tune::collect_metrics(data_fit)
## # A tibble: 2 × 4
##   .metric .estimator .estimate .config             
##   <chr>   <chr>          <dbl> <chr>               
## 1 rmse    standard    2050.    Preprocessor1_Model1
## 2 rsq     standard       0.198 Preprocessor1_Model1
tune::collect_predictions(data_fit) %>%
    ggplot(aes(price, .pred)) +
    geom_point(alpha = 0.3, fill = "midnightblue")  + 
    geom_abline(lty = 2, color = "gray50") +
    coord_fixed()

Make Predictions