Click here to read the data manually.
ikea <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-11-03/ikea.csv')
skimr::skim(ikea)
| Name | ikea |
| Number of rows | 3694 |
| Number of columns | 14 |
| _______________________ | |
| Column type frequency: | |
| character | 7 |
| logical | 1 |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| name | 0 | 1 | 3 | 27 | 0 | 607 | 0 |
| category | 0 | 1 | 4 | 36 | 0 | 17 | 0 |
| old_price | 0 | 1 | 4 | 13 | 0 | 365 | 0 |
| link | 0 | 1 | 52 | 163 | 0 | 2962 | 0 |
| other_colors | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| short_description | 0 | 1 | 3 | 63 | 0 | 1706 | 0 |
| designer | 0 | 1 | 3 | 1261 | 0 | 381 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| sellable_online | 0 | 1 | 0.99 | TRU: 3666, FAL: 28 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| …1 | 0 | 1.00 | 1846.50 | 1066.51 | 0 | 923.25 | 1846.5 | 2769.75 | 3693 | ▇▇▇▇▇ |
| item_id | 0 | 1.00 | 48632396.79 | 28887094.10 | 58487 | 20390574.00 | 49288078.0 | 70403572.75 | 99932615 | ▇▇▇▇▇ |
| price | 0 | 1.00 | 1078.21 | 1374.65 | 3 | 180.90 | 544.7 | 1429.50 | 9585 | ▇▁▁▁▁ |
| depth | 1463 | 0.60 | 54.38 | 29.96 | 1 | 38.00 | 47.0 | 60.00 | 257 | ▇▃▁▁▁ |
| height | 988 | 0.73 | 101.68 | 61.10 | 1 | 67.00 | 83.0 | 124.00 | 700 | ▇▂▁▁▁ |
| width | 589 | 0.84 | 104.47 | 71.13 | 1 | 60.00 | 80.0 | 140.00 | 420 | ▇▅▂▁▁ |
data <- ikea %>%
# Treat missing values
filter(!is.na(height), !is.na(width), !is.na(depth)) %>%
mutate(across(is.logical, as.factor)) %>%
# Remove unwanted columns
select(-...1, -link, -old_price, -designer) %>%
# log transform variables with positively skewed distributions
mutate(price = log(price))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(is.logical, as.factor)`.
## Caused by warning:
## ! Use of bare predicate functions was deprecated in tidyselect 1.1.0.
## ℹ Please use wrap predicates in `where()` instead.
## # Was:
## data %>% select(is.logical)
##
## # Now:
## data %>% select(where(is.logical))
Products with larger dimensions, especially those with widths over 161.5 cm and depths over 60 cm, show the strongest positive correlations with higher price ranges. The width variable (161.5 cm and above) shows a correlation of 0.579, while the depth variable (over 60 cm) has a correlation of 0.447, indicating that larger-sized furniture tends to be more expensive.
The category ‘Sofas & Armchairs’ stands out with a correlation of 0.379 with higher prices, suggesting that items in this category are generally priced higher. Similarly, ‘Wardrobes’ also show a positive association with price (0.279). Conversely, categories like ‘Children’s Furniture’ (-0.119) and ‘TV & Media Furniture’ (0.0069) show weaker or negative associations with price, suggesting that products in these categories tend to be less expensive.
Products taller than 171 cm show a somewhat positive correlation with price (0.232), while shorter products (less than 71 cm in height) are negatively correlated with price (-0.277). This suggests that taller products are more likely to be associated with higher prices, reflecting an emphasis on height as a cost factor.
The presence of other colors slightly impacts the price, with products available in multiple colors positively correlated (0.136), indicating that these items might cater to broader consumer preferences and therefore command higher prices.
Products that are sellable online show a slight positive correlation with price (0.048), reinforcing that items with online accessibility might be positioned at higher price points, possibly due to broader market reach and convenience factors.
# Step 1: Prepare data by binarising data
binarized_table <- data %>%
select(price, height, width, depth, category, other_colors, sellable_online) %>%
binarize()
binarized_table %>% glimpse()
## Rows: 1,899
## Columns: 35
## $ `price__-Inf_5.68697535633982` <dbl> 1, 1, 0, 1, 1, 1, 0, …
## $ price__5.68697535633982_6.52209279817015 <dbl> 0, 0, 1, 0, 0, 0, 1, …
## $ price__6.52209279817015_7.37085996851068 <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ price__7.37085996851068_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `height__-Inf_71` <dbl> 0, 1, 0, 0, 0, 0, 0, …
## $ height__71_92 <dbl> 0, 0, 1, 0, 0, 0, 0, …
## $ height__92_171 <dbl> 1, 0, 0, 1, 1, 1, 1, …
## $ height__171_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `width__-Inf_60` <dbl> 1, 0, 1, 1, 1, 1, 1, …
## $ width__60_93 <dbl> 0, 1, 0, 0, 0, 0, 0, …
## $ width__93_161.5 <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ width__161.5_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `depth__-Inf_40` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ depth__40_47 <dbl> 0, 0, 1, 1, 1, 1, 1, …
## $ depth__47_60 <dbl> 1, 1, 0, 0, 0, 0, 0, …
## $ depth__60_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ category__Bar_furniture <dbl> 1, 1, 1, 1, 1, 1, 1, …
## $ category__Beds <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Bookcases_&_shelving_units` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Cabinets_&_cupboards` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ category__Chairs <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Chests_of_drawers_&_drawer_units` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Children's_furniture` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ category__Nursery_furniture <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ category__Outdoor_furniture <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Sideboards,_buffets_&_console_tables` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Sofas_&_armchairs` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__Tables_&_desks` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__TV_&_media_furniture` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ category__Wardrobes <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ `category__-OTHER` <dbl> 0, 0, 0, 0, 0, 0, 0, …
## $ other_colors__No <dbl> 0, 1, 1, 1, 1, 1, 1, …
## $ other_colors__Yes <dbl> 1, 0, 0, 0, 0, 0, 0, …
## $ sellable_online__TRUE <dbl> 1, 1, 1, 1, 1, 1, 1, …
## $ `sellable_online__-OTHER` <dbl> 0, 0, 0, 0, 0, 0, 0, …
# Step 2: Correlate the data
corr_tbl <- binarized_table %>%
correlate(price__7.37085996851068_Inf)
corr_tbl
## # A tibble: 35 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 price 7.37085996851068_Inf 1
## 2 width 161.5_Inf 0.579
## 3 depth 60_Inf 0.447
## 4 category Sofas_&_armchairs 0.379
## 5 width -Inf_60 -0.374
## 6 price -Inf_5.68697535633982 -0.336
## 7 price 6.52209279817015_7.37085996851068 -0.333
## 8 price 5.68697535633982_6.52209279817015 -0.331
## 9 category Wardrobes 0.279
## 10 height -Inf_71 -0.277
## # ℹ 25 more rows
# Step 3: Plot
corr_tbl %>%
plot_correlation_funnel()
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Based on the boxplot and histogram below, we can see that the category ‘wardrobes’ is on average the most expensive, while Children’s furniture is on average the least expensive. When saying this, we must state that prices greatly fluctuate in some categories, as we can see in the TV & media furnitures category.
data %>%
ggplot(aes(price, as.factor(category))) +
geom_boxplot()
category <- ikea %>%
select(price, category) %>%
mutate(price = log(price))
# Calculate average price per category
category_avg_price <- category %>%
group_by(category) %>%
summarise(avg_price = mean(price, na.rm = TRUE)) %>%
ungroup()
# Plot bar plot of average prices per category
ggplot(category_avg_price, aes(x = reorder(category, avg_price), y = avg_price)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
labs(title = "Average Price per Product Category",
x = "Product Category",
y = "Average Price (log-transformed)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
The next boxplots tells us how the price of a product is correlated to the fact if that product is also sold in different colors. We can see that when a product is also available in different colors, the price on average is slightly more than if not.
data %>%
ggplot(aes(price, as.factor(other_colors))) +
geom_boxplot()
Similar to previous predictor, we can see that when a product is also sold online, and thus more available to consumers, the average price of that product is more expensive. We can also see, in this case, products that are also sold online vary less in price than the products that are not sold online.
data %>%
ggplot(aes(price, sellable_online)) +
geom_boxplot()
Finally, we look at the short description of these products. The diagram shows us that when the word ‘sliding’ is included in the short description of a product, it is likely to be more expensive than when the word ‘conference’ is included.
data %>%
# tokenize title
unnest_tokens(output = word, input = short_description) %>%
# calculate average price per word
group_by(word) %>%
summarise(price = mean(price),
n = n()) %>%
ungroup() %>%
filter(n > 10, !str_detect(word, "\\d")) %>%
slice_max(order_by = price, n = 20) %>%
# Plot
ggplot(aes(price, fct_reorder(word, price))) +
geom_point() +
labs(y = "Words in Short Description",
x = "Average Price when Word is Included")
Split Data
# data <- sample_n(data, 100)
set.seed(1234)
ikea_split <- rsample::initial_split(data, prop = 0.75)
ikea_train <- training(ikea_split)
ikea_test <- testing(ikea_split)
Preprocess Data
# Specify recipe
ikea_recipe <-
recipe(price ~ ., data = ikea_train) %>%
update_role(item_id, new_role = "id variable") %>%
step_tokenize(short_description) %>%
step_tokenfilter(short_description, max_tokens = 100) %>%
step_tfidf(short_description) %>%
step_other(category, name) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
step_YeoJohnson(width, height, depth) %>%
step_impute_knn(all_predictors())
ikea_recipe_prep <- ikea_recipe %>%
prep()
ikea_train_prepped <- ikea_recipe_prep %>%
juice()
ikea_train_prepped %>% glimpse()
## Rows: 1,424
## Columns: 121
## $ item_id <dbl> 19282962, 29320911, 49276549…
## $ depth <dbl> 6.921938, 5.331305, 4.942159…
## $ height <dbl> 12.95187, 21.33670, 14.11837…
## $ width <dbl> 9.034855, 7.442056, 5.426853…
## $ price <dbl> 7.595890, 7.833996, 6.429719…
## $ tfidf_short_description_1 <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x40x64 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_140x200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_147x147 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x44x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x60x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x66x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_2 <dbl> 0.8840660, 0.0000000, 0.5304…
## $ tfidf_short_description_200x66x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_25x51x70 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_3 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_4 <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_41x101 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_41x61 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_5 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_50x51x70 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x50x128 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_61x101 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_74 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_75 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_8 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x30x202 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_99x44x56 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_and <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_armchair <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_armrest <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_armrests <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_backrest <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_bar <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_baskets <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bed <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_bench <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_bookcase <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_box <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cabinet <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_cabinets <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_castors <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_chair <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_chaise <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_changing <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_chest <dbl> 0.0000000, 0.0000000, 0.5867…
## $ `tfidf_short_description_children's` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_clothes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cm <dbl> 0.0000000, 0.2905183, 0.1743…
## $ tfidf_short_description_combination <dbl> 0.0000000, 0.6909039, 0.0000…
## $ tfidf_short_description_corner <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_cover <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_desk <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_door <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_doors <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_drawer <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_drawers <dbl> 0.0000000, 0.0000000, 0.5004…
## $ tfidf_short_description_feet <dbl> 0.00000, 0.00000, 0.00000, 0…
## $ tfidf_short_description_foldable <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_for <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_frame <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_glass <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_highchair <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_in <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_inserts <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_junior <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_leg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_legs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_lock <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_longue <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_mesh <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_modular <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_module <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_mounted <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_of <dbl> 0.0000000, 0.0000000, 0.5753…
## $ tfidf_short_description_on <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_outdoor <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_panel <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_plinth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rail <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_seat <dbl> 0.7549716, 0.0000000, 0.0000…
## $ tfidf_short_description_section <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sections <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_shelf <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_shelves <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelving <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sliding <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_smart <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_sofa <dbl> 0.7660576, 0.0000000, 0.0000…
## $ tfidf_short_description_step <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_stool <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_storage <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_table <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_top <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_tv <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_underframe <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_unit <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_upright <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_w <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_wall <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_wardrobe <dbl> 0.0000000, 0.7815769, 0.0000…
## $ tfidf_short_description_wire <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_with <dbl> 0.0000000, 0.0000000, 0.0000…
## $ name_BESTÅ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_other <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ category_Bookcases...shelving.units <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ category_Cabinets...cupboards <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 0…
## $ category_Chairs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Chests.of.drawers...drawer.units <dbl> 0, 0, 1, 0, 1, 0, 1, 0, 1, 0…
## $ category_Sofas...armchairs <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Tables...desks <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_TV...media.furniture <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Wardrobes <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_other <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ sellable_online_FALSE. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sellable_online_TRUE. <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ other_colors_No <dbl> 1, 1, 0, 1, 1, 0, 0, 0, 1, 0…
## $ other_colors_Yes <dbl> 0, 0, 1, 0, 0, 1, 1, 1, 0, 1…
# Specify model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine recipe and model using workflow
xgboost_workflow <-
workflow() %>%
add_recipe(ikea_recipe) %>%
add_model(xgboost_spec)
# Create cross-validation folds
set.seed(2345)
ikea_cv <- vfold_cv(ikea_train, v = 5)
# Perform hyperparameter tuning
set.seed(3456)
xgboost_tune <-
tune_grid(
xgboost_workflow,
resamples = ikea_cv,
grid = 5
)
## Warning: package 'xgboost' was built under R version 4.3.3