Goal: To predict the rental prices in the SF rental market Click {here for the data} (https://github.com/rfordatascience/tidytuesday/tree/master/data/2022/2022-07-05).
rent <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-07-05/rent.csv')
## Rows: 200796 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): post_id, nhood, city, county, address, title, descr, details
## dbl (9): date, year, price, beds, baths, sqft, room_in_apt, lat, lon
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(rent)
Name | rent |
Number of rows | 200796 |
Number of columns | 17 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 9 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
post_id | 0 | 1.00 | 9 | 14 | 0 | 200796 | 0 |
nhood | 0 | 1.00 | 4 | 43 | 0 | 167 | 0 |
city | 0 | 1.00 | 5 | 19 | 0 | 104 | 0 |
county | 1394 | 0.99 | 4 | 13 | 0 | 10 | 0 |
address | 196888 | 0.02 | 1 | 38 | 0 | 2869 | 0 |
title | 2517 | 0.99 | 2 | 298 | 0 | 184961 | 0 |
descr | 197542 | 0.02 | 13 | 16975 | 0 | 3025 | 0 |
details | 192780 | 0.04 | 4 | 595 | 0 | 7667 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
date | 0 | 1.00 | 20095718.38 | 44694.07 | 20000902.00 | 20050227.00 | 20110924.00 | 20120805.0 | 20180717.00 | ▁▇▁▆▃ |
year | 0 | 1.00 | 2009.51 | 4.48 | 2000.00 | 2005.00 | 2011.00 | 2012.0 | 2018.00 | ▁▇▁▆▃ |
price | 0 | 1.00 | 2135.36 | 1427.75 | 220.00 | 1295.00 | 1800.00 | 2505.0 | 40000.00 | ▇▁▁▁▁ |
beds | 6608 | 0.97 | 1.89 | 1.08 | 0.00 | 1.00 | 2.00 | 3.0 | 12.00 | ▇▂▁▁▁ |
baths | 158121 | 0.21 | 1.68 | 0.69 | 1.00 | 1.00 | 2.00 | 2.0 | 8.00 | ▇▁▁▁▁ |
sqft | 136117 | 0.32 | 1201.83 | 5000.22 | 80.00 | 750.00 | 1000.00 | 1360.0 | 900000.00 | ▇▁▁▁▁ |
room_in_apt | 0 | 1.00 | 0.00 | 0.04 | 0.00 | 0.00 | 0.00 | 0.0 | 1.00 | ▇▁▁▁▁ |
lat | 193145 | 0.04 | 37.67 | 0.35 | 33.57 | 37.40 | 37.76 | 37.8 | 40.43 | ▁▁▅▇▁ |
lon | 196484 | 0.02 | -122.21 | 0.78 | -123.20 | -122.42 | -122.26 | -122.0 | -74.20 | ▇▁▁▁▁ |
data <- rent %>%
# Treat missing values
select(-address, -descr, -details, -lat, -lon, -date, -year, -room_in_apt) %>%
na.omit() %>%
# Log Transform Variables with pos-skewed Distribution
mutate(price = log(price))
Identify good predictors
sqft
data %>%
ggplot(aes(price, sqft)) +
scale_y_log10() +
geom_point()
beds
data %>%
ggplot(aes(price, as.factor(beds))) +
geom_boxplot()
title
data %>%
# tokens title
unnest_tokens(output = word, input = title) %>%
# Calculate avg rent per word
group_by(word) %>%
summarise(price = mean(price),
n = n()) %>%
ungroup() %>%
filter(n > 10) %>%
slice_max(order_by = price, n = 20) %>%
# Plot
ggplot(aes(price, fct_reorder(word, price))) +
geom_point() +
labs(y = "Words in Title")
EDA Shortcut
# Step 1: Prepare data
data_binarized_tbl <- data %>%
select(-post_id, -title) %>%
binarize()
data_binarized_tbl %>% glimpse()
## Rows: 14,394
## Columns: 85
## $ nhood__campbell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__concord_/_pleasant_hill_/_martinez` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__cupertino <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__daly_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__danville_/_san_ramon` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__dublin_/_pleasanton` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__fairfield_/_vacaville` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__foster_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__hayward_/_castro_valley` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__milpitas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__mountain_view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__napa_county <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__palo_alto <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__petaluma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__pittsburg_/_antioch` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__rohnert_pk_/_cotati` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_central <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_east <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_north <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_south <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_jose_west <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__san_rafael <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_clara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__santa_rosa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__SOMA_/_south_beach` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__sunnyvale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ nhood__union_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__vallejo_/_benicia` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__willow_glen_/_cambrian` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `nhood__-OTHER` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ city__cambrian <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__campbell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__concord <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__cupertino <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__daly_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__dublin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__fairfield <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__foster_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__hayward <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__milpitas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__mountain_view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__napa_county <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__oakland <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__palo_alto <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__petaluma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__pittsburg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__rohnert_park <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_jose <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_rafael <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__san_ramon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_clara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__santa_rosa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__sunnyvale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__union_city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ city__vallejo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `city__-OTHER` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ county__alameda <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ county__contra_costa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__marin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__napa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__san_francisco <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__san_mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__santa_clara <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__santa_cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__solano <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ county__sonoma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `price__-Inf_7.52294091807237` <dbl> 0, 1, 0, 1, 0, 1, 1, 0, 0,…
## $ price__7.52294091807237_7.80384330353877 <dbl> 0, 0, 1, 0, 0, 0, 0, 1, 1,…
## $ price__7.80384330353877_8.07868822922987 <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ price__8.07868822922987_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `beds__-Inf_2` <dbl> 0, 1, 0, 1, 1, 1, 0, 0, 1,…
## $ beds__2_3 <dbl> 0, 0, 1, 0, 0, 0, 1, 1, 0,…
## $ beds__3_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `baths__-Inf_2` <dbl> 0, 1, 1, 1, 1, 1, 0, 0, 1,…
## $ baths__2_Inf <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0,…
## $ `sqft__-Inf_887` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1,…
## $ sqft__887_1100 <dbl> 0, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ sqft__1100_1500 <dbl> 0, 0, 1, 0, 1, 1, 0, 0, 0,…
## $ sqft__1500_Inf <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0,…
# Step 2: Correlate
data_corr_tbl <- data_binarized_tbl %>%
correlate(price__8.07868822922987_Inf)
data_corr_tbl
## # A tibble: 85 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 price 8.07868822922987_Inf 1
## 2 city san_francisco 0.389
## 3 county san_francisco 0.389
## 4 price -Inf_7.52294091807237 -0.342
## 5 price 7.80384330353877_8.07868822922987 -0.330
## 6 price 7.52294091807237_7.80384330353877 -0.328
## 7 sqft 1500_Inf 0.324
## 8 beds -Inf_2 -0.254
## 9 beds 3_Inf 0.241
## 10 sqft -Inf_887 -0.240
## # ℹ 75 more rows
# Step 3: Plot
data_corr_tbl %>%
plot_correlation_funnel()
## Warning: ggrepel: 69 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Split Data
# data <- sample_n(data, 100)
# Split into train and test data set
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
# Further split training data set for cross-validation
set.seed(12345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [9715/1080]> Fold01
## 2 <split [9715/1080]> Fold02
## 3 <split [9715/1080]> Fold03
## 4 <split [9715/1080]> Fold04
## 5 <split [9715/1080]> Fold05
## 6 <split [9716/1079]> Fold06
## 7 <split [9716/1079]> Fold07
## 8 <split [9716/1079]> Fold08
## 9 <split [9716/1079]> Fold09
## 10 <split [9716/1079]> Fold10
library(usemodels)
usemodels::use_xgboost(price ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = price ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(72471)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
#Specify recipe
xgboost_recipe <-
recipe(formula = price ~ ., data = data_train) %>%
recipes::update_role(post_id, new_role = "id variable") %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tfidf(title) %>%
step_other(nhood) %>%
step_dummy(nhood, city, county, one_hot = TRUE) %>%
step_log(sqft, beds, baths)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 10,795
## Columns: 202
## $ post_id <fct> 4811120754, 4924595622, 6359733516, pre2013_1696…
## $ beds <dbl> 0.6931472, 0.6931472, 1.0986123, 1.0986123, 0.00…
## $ baths <dbl> 0.0000000, 0.6931472, 0.6931472, 1.0986123, 0.00…
## $ sqft <dbl> 7.244228, 6.966967, 7.337588, 7.600902, 6.493754…
## $ price <dbl> 7.374629, 7.945910, 8.341649, 7.495542, 7.130899…
## $ tfidf_title_1 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.48…
## $ tfidf_title_1.5 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_1bath <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_1br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.32…
## $ tfidf_title_2 <dbl> 0.26170848, 0.00000000, 0.16356780, 0.00000000, …
## $ tfidf_title_2.5 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2ba <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_2bath <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_2bd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2br <dbl> 0.00000000, 0.72145594, 0.00000000, 0.00000000, …
## $ tfidf_title_3 <dbl> 0.0000000, 0.0000000, 0.2490204, 0.0000000, 0.00…
## $ tfidf_title_3br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.3722824, 0.00…
## $ tfidf_title_4 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_4br <dbl> 0.0000000, 0.0000000, 0.0000000, 0.5368510, 0.00…
## $ tfidf_title_5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_7 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_8 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_9 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_a <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_amp <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_and <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_apartment <dbl> 0.536851, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_apt <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_at <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_available <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_ba <dbl> 0.0000000, 0.0000000, 0.3011678, 0.0000000, 0.00…
## $ tfidf_title_bath <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, …
## $ tfidf_title_bathroom <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_baths <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bd <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_beautiful <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bed <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.29…
## $ tfidf_title_bedroom <dbl> 0.3570456, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_bedrooms <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_br <dbl> 0.0000000, 0.0000000, 0.3225057, 0.0000000, 0.00…
## $ tfidf_title_car <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_condo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_downtown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_family <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_floor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_for <dbl> 0.0000000, 0.0000000, 0.0000000, 0.5048902, 0.36…
## $ tfidf_title_ft <dbl> 0.0000000, 0.0000000, 0.4319118, 0.0000000, 0.00…
## $ tfidf_title_full <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_garage <dbl> 0.0000000, 0.0000000, 0.3921030, 0.0000000, 0.00…
## $ tfidf_title_great <dbl> 0.6102327, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_har <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_hide <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_hill <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_home <dbl> 0.0000000, 0.0000000, 0.0000000, 0.4027480, 0.00…
## $ tfidf_title_house <dbl> 0.0000000, 0.0000000, 0.3059722, 0.0000000, 0.00…
## $ tfidf_title_in <dbl> 0.00000000, 0.83344405, 0.00000000, 0.00000000, …
## $ tfidf_title_jose <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_large <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_location <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_luxury <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_map <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_move <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_near <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_nice <dbl> 0.0000000, 0.0000000, 0.0000000, 0.7484949, 0.00…
## $ tfidf_title_now <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_of <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_one <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_open <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_parking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_pic <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_posting <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_remodeled <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_rent <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_resim <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_restore <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_san <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_santa <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_single <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_spacious <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_sq <dbl> 0.0000000, 0.0000000, 0.4393652, 0.0000000, 0.00…
## $ tfidf_title_şub <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_the <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.464113…
## $ tfidf_title_this <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_to <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_top <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_townhome <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_townhouse <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_two <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_unit <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_valley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_view <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_views <dbl> 0.6856356, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_w <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_with <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_게시물 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_사진 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_월 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_이 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_지도 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_图片 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_地图 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_月 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ nhood_sunnyvale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ nhood_other <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ city_alameda <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_atherton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_belmont <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_belvedere <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_ben.lomond <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_berkeley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_boulder.cr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_brentwood <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_brisbane <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_burlingame <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_cambrian <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_campbell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ city_cloverdale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_colma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_concord <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_corralitos <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_corte.madera <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_cupertino <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_daly.city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ city_dublin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ city_el.cerrito <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_el.sobrante <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_emeryville <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_fairfax <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_fairfield <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_felton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_foster.city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_gilroy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_greenbrae <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_half.moon.bay <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_hayward <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_healdsburg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_inverness <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_kentfield <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_larkspur <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_livermore <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_los.altos <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_los.gatos <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_marin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_menlo.park <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_mill.valley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_millbrae <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_milpitas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_morgan.hill <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_mountain.view <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ city_napa.county <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_novato <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_oakland <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_orinda <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_pacifica <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_palo.alto <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_peninsula <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_pescadero <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_petaluma <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_pittsburg <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_redwood.city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_redwood.shores <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_richmond <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_rio.vista <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_rohnert.park <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_russian.river <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.anselmo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.bruno <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.francisco <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ city_san.jose <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.leandro <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.rafael <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_san.ramon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ city_santa.clara <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_santa.cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ city_santa.rosa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_saratoga <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_sausalito <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_scotts.valley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_sebastopol <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_sonoma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ city_soquel <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_sunnyvale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_union.city <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_vallejo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_walnut.creek <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_watsonville <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_woodside <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ city_yountville <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ county_alameda <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ county_contra.costa <dbl> 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ county_marin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ county_napa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ county_san.francisco <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ county_san.mateo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ county_santa.clara <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, …
## $ county_santa.cruz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ county_solano <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ county_sonoma <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
# Specify model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine recipe and model using workflow
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
# Tune hyper-parameters
set.seed(344)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5)
## i Creating pre-processing data to finalize unknown parameter: mtry
## → A | error: [14:22:04] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
##
There were issues with some computations A: x1
There were issues with some computations A: x3
→ B | error: [14:22:05] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3
There were issues with some computations A: x3 B: x2
→ C | error: [14:22:06] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2
There were issues with some computations A: x3 B: x2 C: x1
There were issues with some computations A: x3 B: x2 C: x2
There were issues with some computations A: x3 B: x2 C: x4
→ D | error: [14:22:08] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x4
There were issues with some computations A: x3 B: x2 C: x5 D: x1
There were issues with some computations A: x3 B: x2 C: x5 D: x3
There were issues with some computations A: x3 B: x2 C: x5 D: x5
→ E | error: [14:22:09] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x1
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x2
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x4
→ F | error: [14:22:10] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x4
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ G | error: [14:22:11] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ H | error: [14:22:12] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ I | error: [14:22:13] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ J | error: [14:22:14] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ K | error: [14:22:15] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ L | error: [14:22:16] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ M | error: [14:22:17] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ N | error: [14:22:18] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
→ O | error: [14:22:19] src/data/data.cc:1104: Check failed: valid: Input data contains `inf` or `nan`
## Stack trace:
## [bt] (0) 1 xgboost.so 0x000000010fbb2d94 dmlc::LogMessageFatal::~LogMessageFatal() + 116
## [bt] (1) 2 xgboost.so 0x000000010fc44910 unsigned long long xgboost::SparsePage::Push<xgboost::data::DenseAdapterBatch>(xgboost::data::DenseAdapterBatch const&, float, int) + 1152
## [bt] (2) 3 xgboost.so 0x000000010fc374ba xgboost::data::SimpleDMatrix::SimpleDMatrix<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int) + 362
## [bt] (3) 4 xgboost.so 0x000000010fc44165 xgboost::DMatrix* xgboost::DMatrix::Create<xgboost::data::DenseAdapter>(xgboost::data::DenseAdapter*, float, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 53
## [bt] (4) 5 xgboost.so 0x000000010fd77cd1 XGDMatrixCreateFromMat_omp
## There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
There were issues with some computations A: x3 B: x2 C: x5 D: x5 E: x…
## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.