ikea <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-11-03/ikea.csv')
# Clean the data and address missing values
data <- ikea %>%
filter(!is.na(height), !is.na(width), !is.na(depth)) %>%
mutate(across(is.logical, as.factor)) %>%
select(-...1, -link, -old_price, -designer) %>%
mutate(price = log(price)) %>% # Log-transform price
mutate(volume = height * width * depth) # Add volume feature
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(is.logical, as.factor)`.
## Caused by warning:
## ! Use of bare predicate functions was deprecated in tidyselect 1.1.0.
## ℹ Please use wrap predicates in `where()` instead.
## # Was:
## data %>% select(is.logical)
##
## # Now:
## data %>% select(where(is.logical))
skimr::skim(data)
| Name | data |
| Number of rows | 1899 |
| Number of columns | 11 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| factor | 1 |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| name | 0 | 1 | 3 | 21 | 0 | 289 | 0 |
| category | 0 | 1 | 4 | 36 | 0 | 17 | 0 |
| other_colors | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| short_description | 0 | 1 | 3 | 62 | 0 | 992 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| sellable_online | 0 | 1 | FALSE | 2 | TRU: 1886, FAL: 13 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| item_id | 0 | 1 | 48805184.68 | 29150143.77 | 116595.00 | 20343801.50 | 49278591.00 | 70439650.50 | 99903788.00 | ▇▇▆▇▆ |
| price | 0 | 1 | 6.47 | 1.23 | 1.79 | 5.69 | 6.52 | 7.37 | 9.17 | ▁▂▆▇▃ |
| depth | 0 | 1 | 56.16 | 30.58 | 1.00 | 40.00 | 47.00 | 60.00 | 257.00 | ▇▃▁▁▁ |
| height | 0 | 1 | 113.11 | 62.75 | 2.00 | 71.00 | 92.00 | 171.00 | 301.00 | ▃▇▂▃▁ |
| width | 0 | 1 | 119.75 | 77.52 | 2.00 | 60.00 | 93.00 | 161.50 | 420.00 | ▇▆▃▁▁ |
| volume | 0 | 1 | 910690.07 | 1110359.29 | 40.00 | 202656.00 | 467200.00 | 1299817.50 | 13629000.00 | ▇▁▁▁▁ |
#data %>%
#ggplot(aes(price, as.factor(category))) +
#geom_boxplot()
# Correlation funnel
binarized_table <- data %>%
select(price, height, width, depth, volume, category, other_colors, sellable_online) %>%
binarize()
corr_tbl <- binarized_table %>%
correlate(price__7.37085996851068_Inf)
corr_tbl %>%
plot_correlation_funnel()
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ikea_recipe_improved <- recipe(price ~ ., data = data) %>%
update_role(item_id, new_role = "id variable") %>%
step_tokenize(short_description) %>%
step_tokenfilter(short_description, max_tokens = 150) %>% # Adjust max_tokens
step_tfidf(short_description) %>%
step_other(category, threshold = 0.05) %>% # More aggressive threshold
step_novel(all_nominal_predictors()) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
step_impute_knn(height, width, depth) %>% # KNN for missing values
step_YeoJohnson(all_numeric_predictors()) # Normalize skewed variables
ikea_recipe_improved %>% prep() %>% juice() %>% glimpse()
## Rows: 1,899
## Columns: 462
## $ item_id <dbl> 80155205, 30180504, 10122647…
## $ depth <dbl> 4.691431, 4.945837, 4.546615…
## $ height <dbl> 14.032594, 9.221112, 13.4054…
## $ width <dbl> 5.963386, 6.388163, 5.187576…
## $ volume <dbl> 43.26051, 39.67801, 38.52364…
## $ price <dbl> 4.234107, 5.416100, 5.843544…
## $ tfidf_short_description_1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x40x38 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x40x64 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x42x74 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_140x200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_147x147 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x44x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x60x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x66x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_162x37x134 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_165x55x216 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_200x44x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_200x60x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_200x66x236 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_25x51x70 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_35x35x35 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_4 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_41x101 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_41x61 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_46x30x145 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_46x30x94 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_50x51x70 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x50x128 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x50x192 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x64 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_61x101 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_63 <dbl> 0.0000000, 0.0000000, 0.8752…
## $ tfidf_short_description_7 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_74 <dbl> 0.7525930, 0.0000000, 0.0000…
## $ tfidf_short_description_75 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_76 <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_77x147 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_8 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x139 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x200 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x30x202 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x50x171 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x75 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_89x50x179 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_9 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_90x200 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_90x50x50 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_94x44x52 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_99x44x56 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_add <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_and <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_armchair <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_armrest <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_armrests <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_backrest <dbl> 0.6388815, 0.0000000, 0.6388…
## $ tfidf_short_description_bar <dbl> 0.6137897, 0.0000000, 0.6137…
## $ tfidf_short_description_baskets <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bed <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_bedside <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bench <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bookcase <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bookshelf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_box <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cabinet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cabinets <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_castors <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_chair <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_chaise <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_changing <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_chest <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `tfidf_short_description_children's` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_clothes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cm <dbl> 0.12664927, 0.17824299, 0.12…
## $ tfidf_short_description_combination <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_corner <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cover <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_day <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_desk <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_door <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_doors <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_drawer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_drawers <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_dressing <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_easy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_feet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_foldable <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_folding <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_footstool <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_for <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_frame <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_glass <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_high <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_highchair <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_in <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_inserts <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_island <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_junior <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_kitchen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_leg <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_legs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_lock <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_longue <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_media <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_mesh <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_modular <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_module <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_mounted <dbl> 0.0000000, 0.9702621, 0.0000…
## $ tfidf_short_description_of <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_on <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_open <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_outdoor <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_panel <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_plinth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rack <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rail <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rails <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rocking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rod <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_seat <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_section <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sections <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelves <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelving <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_side <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sideboard <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sliding <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_smart <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sofa <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_step <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_stool <dbl> 0.5450549, 0.0000000, 0.5450…
## $ tfidf_short_description_storage <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_table <dbl> 0.0000000, 0.9308459, 0.0000…
## $ tfidf_short_description_tbl <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_three <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_top <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_tray <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_tv <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_two <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_underframe <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_unit <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_upright <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_ut <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_w <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_wall <dbl> 0.0000000, 0.8020801, 0.0000…
## $ tfidf_short_description_wardrobe <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_wire <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_with <dbl> 0.2960224, 0.0000000, 0.2960…
## $ tfidf_short_description_workspace <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ADDE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_AGAM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_AGEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALEX <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALGOT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALGOT...BROR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALGOT...SKÅDIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ANGERSBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ANTILOP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ÄPPLARÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ARKELSTORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ASKEBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ASKHOLMEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ASKVOLL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BALSBERGET <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BALTSAR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BEKANT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BEKVÄM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BENARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BERNHARD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BESTÅ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BESTÅ...EKET <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BESTÅ.BURS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...BOTTNA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...GNEDBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...MORLIDEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...OXBERG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BINGSTA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BJÖRKSNÄS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BLÅMES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BÖRJE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRÄDA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRÅTHULT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRIMNES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BROMMÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BROR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRORINGE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRUSALI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRUSEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRYGGJA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BUNSÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BUSKBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BUSUNGE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BYÅS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BYLLAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_DELAKTIG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_DETOLF <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_DIETMAR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKEDALEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ name_EKENÄS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKERÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKET <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKOLSUND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKTORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ELVARLI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ENETRI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ERIK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ERNFRID <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FABRIKÖR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FALHOLMEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FANBYN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FÄRLÖV <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FEJAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FINNBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FJÄLLBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FLISAT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FLOTTEBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FÖRSIKTIG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FRANKLIN <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ name_FREKVENS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ name_FRIHETEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GALANT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GAMLARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GAMLEBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GENEVAD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GERSBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GISTAD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GLENN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GNEDBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GODISHUS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GODVIN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRÄLVIKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRÖNADAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRÖNLID <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRUNDTAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRUVBYN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GULLIVER <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GUNDE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HÄLLAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HAMMARN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HARRY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HÄSSELBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HATTEFJÄLL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HAVSTA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HAVSTEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HEJNE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HELMER <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HEMNES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HENRIKSDAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HOLMSUND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HUSARÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HYLLIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IDÅSEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS.GULLHOLMEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS.LÖMSK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS.VÅGÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_INGATORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_INGOLF <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0…
## $ name_INNAMO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ISBERGET <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IVAR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_JANINGE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_JOKKMOKK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_JONAXEL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KALLAX <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KARLHUGO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KARLJAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KIVIK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KLEPPSTAD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KLIMPEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KLIPPAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KNARREVIK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KNOPPARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KNOTTEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KOARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KOLBJÖRN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KOPPANG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KORNSJÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KULLABERG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KULLEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KUNGSHAMN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KUNGSHOLMEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KYRRE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LACK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LÄCKÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LAIVA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LANDSKRONA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LÅNGFJÄLL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LANGUR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LAPPLAND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LEIFARNE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LENNART <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LERBERG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LERHAMN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LIATORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LIDHULT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LILLÅSEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LISABO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LIXHULT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LOMMARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LOTE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE.HÅVET <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE.LÖVÅS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE.MURBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MACKAPÄR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MALM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MALSJÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MAMMUT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MARKERAD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MARTIN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MÄSTERBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MASTHOLMEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MICKE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MILSBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MOSJÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MOSTORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MUREN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NANNARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NIKKEBY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NILSOVE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NILSOVE...NORNA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NISSE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NOLMYRA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NOMINELL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORBERG <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDKISA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDLI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDMELA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDVIKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORRÅKER <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORRARYD <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ name_NORRNÄS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORSBORG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NYHAMN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ODDVALD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ODGER <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_OLAUS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ÖNSKLIG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PÅHL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...AULI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...BERGSBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...FÄRVIK.AULI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...FORSAND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...FORSAND.VIKEDAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...GRIMO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...GRIMO.VIKEDAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...HASVIK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...HOKKSUND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...MEHAMN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...MEHAMN.AULI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...MEHAMN.SEKKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...SEKKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...TYSSEDAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...VIKEDAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...VINGROM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...VINTERBRO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PELLO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PLATSA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_POÄNG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PRÄSTHOLM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RÅDVIKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RAKKESTAD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RÅSKOG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RAST <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_REGISSÖR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_REMSTA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RÖNNINGE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SAKARIAS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SALTHOLMEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SANDBACKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SETSKOG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SJÄLLAND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKÅDIS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKARPÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKOGSTORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKRUVSTA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKUBB <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SLÄKT <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SMÅGÖRA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SÖDERHAMN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SOLLERÖN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SONGESAND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STALLARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STEFAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STIG <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOCKHOLM <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOCKHOLM.2017 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOCKSUND <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOLJAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STRANDMON <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUBBARP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUVA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUVA...FÖLJA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUVA...FRITIDS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SUNDLANDET <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SUNDVIK <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVALNÄS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVANÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVÄRTA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVENARNE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVENBERTIL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SYVDE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TÄRNÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TEODORES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TERJE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TOBIAS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TORNVIKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TOSSBERG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TOSTERÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TRANARÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TROFAST <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TROGEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TRYSIL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TULLSTA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TYSSEDAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ULRIKSBERG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_URBAN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VÄDDÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VADHOLMA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VALLENTUNA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VATTVIKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VEBERÖD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VEDBO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VESKEN <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VIGGJA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VIKHAMMER <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VILTO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VIMLE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VISTHUS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VITTSJÖ <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VITVAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VOLFGANG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VUKU <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_YNGVAR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Bookcases...shelving.units <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Cabinets...cupboards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Chairs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Chests.of.drawers...drawer.units <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Sofas...armchairs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Tables...desks <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_TV...media.furniture <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Wardrobes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_other <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ category_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sellable_online_FALSE. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sellable_online_TRUE. <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sellable_online_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ other_colors_No <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ other_colors_Yes <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ other_colors_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
set.seed(1234)
ikea_split <- initial_split(data, prop = 0.75)
ikea_train <- training(ikea_split)
ikea_test <- testing(ikea_split)
set.seed(2345)
ikea_cv <- vfold_cv(ikea_train, v = 5)
# XGBoost Model
xgboost_spec <- boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Random Forest Model
rf_spec <- rand_forest(trees = tune(), min_n = tune(), mtry = tune()) %>%
set_mode("regression") %>%
set_engine("ranger")
# SVM Model
svm_spec <- svm_rbf(cost = tune(), rbf_sigma = tune()) %>%
set_mode("regression") %>%
set_engine("kernlab")
# Create workflows
xgboost_workflow <- workflow() %>%
add_recipe(ikea_recipe_improved) %>%
add_model(xgboost_spec)
rf_workflow <- workflow() %>%
add_recipe(ikea_recipe_improved) %>%
add_model(rf_spec)
svm_workflow <- workflow() %>%
add_recipe(ikea_recipe_improved) %>%
add_model(svm_spec)
# Finalize mtry based on training data
mtry_range <- finalize(mtry(), x = ikea_train)
# XGBoost Grid
xgboost_grid <- grid_latin_hypercube(trees(), min_n(), mtry = mtry_range, learn_rate(), size = 20)
# Random Forest Grid
rf_grid <- grid_regular(trees(), min_n(), mtry = mtry_range, levels = 5)
# SVM Grid (SVM doesn't use mtry, so it remains the same)
svm_grid <- grid_regular(cost(), rbf_sigma(), levels = 5)
# Tune XGBoost
set.seed(3456)
xgboost_tune <- tune_grid(xgboost_workflow, resamples = ikea_cv, grid = xgboost_grid)
## Warning: package 'xgboost' was built under R version 4.3.3
# Tune Random Forest
set.seed(4567)
rf_tune <- tune_grid(rf_workflow, resamples = ikea_cv, grid = rf_grid)
# Tune SVM
set.seed(5678)
svm_tune <- tune_grid(svm_workflow, resamples = ikea_cv, grid = svm_grid)
# Collect metrics for each model
xgboost_results <- collect_metrics(xgboost_tune)
rf_results <- collect_metrics(rf_tune)
svm_results <- collect_metrics(svm_tune)
# Compare RMSE and R-squared for all models
bind_rows(xgboost_results, rf_results, svm_results) %>%
filter(.metric == "rmse") %>%
arrange(mean)
## # A tibble: 170 × 12
## mtry trees min_n learn_rate .metric .estimator mean n std_err .config
## <int> <int> <int> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 NA NA NA NA rmse standard 0.390 5 0.0154 Preproce…
## 2 8 196 10 0.0900 rmse standard 0.511 5 0.0234 Preproce…
## 3 NA NA NA NA rmse standard 0.516 5 0.0151 Preproce…
## 4 11 2000 2 NA rmse standard 0.534 5 0.0180 Preproce…
## 5 11 1000 2 NA rmse standard 0.534 5 0.0186 Preproce…
## 6 11 500 2 NA rmse standard 0.535 5 0.0165 Preproce…
## 7 11 1500 2 NA rmse standard 0.535 5 0.0177 Preproce…
## 8 11 2000 11 NA rmse standard 0.537 5 0.0187 Preproce…
## 9 11 1000 11 NA rmse standard 0.538 5 0.0187 Preproce…
## 10 11 1500 11 NA rmse standard 0.539 5 0.0190 Preproce…
## # ℹ 160 more rows
## # ℹ 2 more variables: cost <dbl>, rbf_sigma <dbl>
# Select best performing model (e.g., XGBoost)
xgboost_best <- finalize_workflow(xgboost_workflow, select_best(xgboost_tune, metric = "rmse"))
# Fit the final model on the entire training set and test it
ikea_fit <- last_fit(xgboost_best, ikea_split)
# Evaluate on test data
test_metrics <- collect_metrics(ikea_fit)
test_predictions <- collect_predictions(ikea_fit)
# Plot actual vs predicted prices
test_predictions %>%
ggplot(aes(x = price, y = .pred)) +
geom_point(alpha = 0.3, color = "midnightblue") +
geom_abline(lty = 2, color = "gray50") +
coord_fixed() +
labs(title = "Predicted vs Actual Prices (Test Data)", x = "Actual Price (log-transformed)", y = "Predicted Price")
# Extract RMSE, R-squared, and variance explained
rmse_value <- test_metrics %>%
filter(.metric == "rmse") %>%
pull(".estimate") # Corrected: ".estimate" is the correct column
rsq_value <- test_metrics %>%
filter(.metric == "rsq") %>%
pull(".estimate") # Corrected: ".estimate" is the correct column
variance_explained <- rsq_value * 100
# Print the values
print(paste("RMSE:", rmse_value))
## [1] "RMSE: 0.462790631109112"
print(paste("R-squared:", rsq_value))
## [1] "R-squared: 0.862361131632264"
print(paste("Variance Explained (%):", variance_explained))
## [1] "Variance Explained (%): 86.2361131632264"
The final model for predicting IKEA product prices performed well, achieving an RMSE of 0.4628 and an R-squared of 0.8624. This means that the model explains 86.24% of the variance in the dataset, indicating a high level of accuracy in predicting product prices. While there is still some room for improvement, the model’s current performance suggests that it is reliable for making price predictions based on features like product dimensions, categories, and text descriptions.
Efforts to improve the model included adding new features like product volume, handling missing data with KNN imputation, and refining the recipe with steps like step_other() and step_novel(). These changes helped the model generalize better, and while the RMSE is slightly higher than ideal, the model demonstrates strong predictive capabilities overall.