Data Import and Cleaning

ikea <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-11-03/ikea.csv')

# Clean the data and address missing values
data <- ikea %>%
  filter(!is.na(height), !is.na(width), !is.na(depth)) %>%
  mutate(across(is.logical, as.factor)) %>%
  select(-...1, -link, -old_price, -designer) %>%
  mutate(price = log(price)) %>%  # Log-transform price
  mutate(volume = height * width * depth)  # Add volume feature
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(is.logical, as.factor)`.
## Caused by warning:
## ! Use of bare predicate functions was deprecated in tidyselect 1.1.0.
## ℹ Please use wrap predicates in `where()` instead.
##   # Was:
##   data %>% select(is.logical)
## 
##   # Now:
##   data %>% select(where(is.logical))
skimr::skim(data)
Data summary
Name data
Number of rows 1899
Number of columns 11
_______________________
Column type frequency:
character 4
factor 1
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1 3 21 0 289 0
category 0 1 4 36 0 17 0
other_colors 0 1 2 3 0 2 0
short_description 0 1 3 62 0 992 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
sellable_online 0 1 FALSE 2 TRU: 1886, FAL: 13

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
item_id 0 1 48805184.68 29150143.77 116595.00 20343801.50 49278591.00 70439650.50 99903788.00 ▇▇▆▇▆
price 0 1 6.47 1.23 1.79 5.69 6.52 7.37 9.17 ▁▂▆▇▃
depth 0 1 56.16 30.58 1.00 40.00 47.00 60.00 257.00 ▇▃▁▁▁
height 0 1 113.11 62.75 2.00 71.00 92.00 171.00 301.00 ▃▇▂▃▁
width 0 1 119.75 77.52 2.00 60.00 93.00 161.50 420.00 ▇▆▃▁▁
volume 0 1 910690.07 1110359.29 40.00 202656.00 467200.00 1299817.50 13629000.00 ▇▁▁▁▁

Explore Data - Correlation Analysis

#data %>%
  #ggplot(aes(price, as.factor(category))) +
  #geom_boxplot()

# Correlation funnel
binarized_table <- data %>%
  select(price, height, width, depth, volume, category, other_colors, sellable_online) %>%
  binarize()

corr_tbl <- binarized_table %>%
  correlate(price__7.37085996851068_Inf)

corr_tbl %>%
  plot_correlation_funnel()
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Preprocessing - Recipe Imporvements

ikea_recipe_improved <- recipe(price ~ ., data = data) %>%
  update_role(item_id, new_role = "id variable") %>%
  step_tokenize(short_description) %>%
  step_tokenfilter(short_description, max_tokens = 150) %>%  # Adjust max_tokens
  step_tfidf(short_description) %>%
  step_other(category, threshold = 0.05) %>%  # More aggressive threshold
    step_novel(all_nominal_predictors()) %>%
  step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
  step_impute_knn(height, width, depth) %>%  # KNN for missing values
  step_YeoJohnson(all_numeric_predictors())  # Normalize skewed variables

ikea_recipe_improved %>% prep() %>% juice() %>% glimpse()
## Rows: 1,899
## Columns: 462
## $ item_id                                   <dbl> 80155205, 30180504, 10122647…
## $ depth                                     <dbl> 4.691431, 4.945837, 4.546615…
## $ height                                    <dbl> 14.032594, 9.221112, 13.4054…
## $ width                                     <dbl> 5.963386, 6.388163, 5.187576…
## $ volume                                    <dbl> 43.26051, 39.67801, 38.52364…
## $ price                                     <dbl> 4.234107, 5.416100, 5.843544…
## $ tfidf_short_description_1                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_10                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x40x38         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x40x64         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_120x42x74         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_140x200           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_147x147           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x44x236        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x60x236        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_150x66x236        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_162x37x134        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_165x55x216        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_2                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_200x44x236        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_200x60x236        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_200x66x236        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_25x51x70          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_3                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_35x35x35          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_4                 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_41x101            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_41x61             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_46x30x145         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_46x30x94          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_5                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_50x51x70          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_6                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x50x128         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x50x192         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_60x64             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_61x101            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_63                <dbl> 0.0000000, 0.0000000, 0.8752…
## $ tfidf_short_description_7                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_74                <dbl> 0.7525930, 0.0000000, 0.0000…
## $ tfidf_short_description_75                <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_76                <dbl> 0.000000, 0.000000, 0.000000…
## $ tfidf_short_description_77x147            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_8                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x139            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x200            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x30x202         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x50x171         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_80x75             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_89x50x179         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_9                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_90x200            <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_90x50x50          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_94x44x52          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_99x44x56          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_add               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_and               <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_armchair          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_armrest           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_armrests          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_backrest          <dbl> 0.6388815, 0.0000000, 0.6388…
## $ tfidf_short_description_bar               <dbl> 0.6137897, 0.0000000, 0.6137…
## $ tfidf_short_description_baskets           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bed               <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_bedside           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bench             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bookcase          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_bookshelf         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_box               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cabinet           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cabinets          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_castors           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_chair             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_chaise            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_changing          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_chest             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `tfidf_short_description_children's`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_clothes           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cm                <dbl> 0.12664927, 0.17824299, 0.12…
## $ tfidf_short_description_combination       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_corner            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_cover             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_day               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_desk              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_door              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_doors             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_drawer            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_drawers           <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_dressing          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_easy              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_feet              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_foldable          <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_folding           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_footstool         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_for               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_frame             <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_glass             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_high              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_highchair         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_in                <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_inserts           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_island            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_junior            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_kitchen           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_leg               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_legs              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_lock              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_longue            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_media             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_mesh              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_modular           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_module            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_mounted           <dbl> 0.0000000, 0.9702621, 0.0000…
## $ tfidf_short_description_of                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_on                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_open              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_outdoor           <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_panel             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_plinth            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rack              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rail              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rails             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rocking           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_rod               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_seat              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_section           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sections          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelf             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelves           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_shelving          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_side              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sideboard         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sliding           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_smart             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_sofa              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_step              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_stool             <dbl> 0.5450549, 0.0000000, 0.5450…
## $ tfidf_short_description_storage           <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_table             <dbl> 0.0000000, 0.9308459, 0.0000…
## $ tfidf_short_description_tbl               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_three             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_top               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_tray              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_tv                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_two               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_underframe        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_unit              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_upright           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_ut                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_w                 <dbl> 0.0000000, 0.0000000, 0.0000…
## $ tfidf_short_description_wall              <dbl> 0.0000000, 0.8020801, 0.0000…
## $ tfidf_short_description_wardrobe          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_wire              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_short_description_with              <dbl> 0.2960224, 0.0000000, 0.2960…
## $ tfidf_short_description_workspace         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ADDE                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_AGAM                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_AGEN                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALEX                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALGOT                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALGOT...BROR                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ALGOT...SKÅDIS                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ANGERSBY                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ANTILOP                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ÄPPLARÖ                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ARKELSTORP                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ASKEBY                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ASKHOLMEN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ASKVOLL                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BALSBERGET                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BALTSAR                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BEKANT                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BEKVÄM                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BENARP                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BERNHARD                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BESTÅ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BESTÅ...EKET                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BESTÅ.BURS                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...BOTTNA                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...GNEDBY                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...MORLIDEN                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BILLY...OXBERG                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BINGSTA                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BJÖRKSNÄS                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BLÅMES                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BÖRJE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRÄDA                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRÅTHULT                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRIMNES                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BROMMÖ                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BROR                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRORINGE                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRUSALI                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRUSEN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BRYGGJA                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BUNSÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BUSKBO                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BUSUNGE                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BYÅS                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_BYLLAN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_DELAKTIG                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_DETOLF                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_DIETMAR                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKEDALEN                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ name_EKENÄS                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKERÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKET                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKOLSUND                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_EKTORP                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ELVARLI                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ENETRI                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ERIK                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ERNFRID                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FABRIKÖR                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FALHOLMEN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FANBYN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FÄRLÖV                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FEJAN                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FINNBY                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FJÄLLBO                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FLISAT                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FLOTTEBO                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FÖRSIKTIG                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_FRANKLIN                             <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ name_FREKVENS                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ name_FRIHETEN                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GALANT                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GAMLARP                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GAMLEBY                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GENEVAD                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GERSBY                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GISTAD                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GLENN                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GNEDBY                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GODISHUS                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GODVIN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRÄLVIKEN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRÖNADAL                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRÖNLID                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRUNDTAL                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GRUVBYN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GULLIVER                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_GUNDE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HÄLLAN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HAMMARN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HARRY                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HÄSSELBY                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HATTEFJÄLL                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HAVSTA                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HAVSTEN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HEJNE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HELMER                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HEMNES                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HENRIKSDAL                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HOLMSUND                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HUSARÖ                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_HYLLIS                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IDÅSEN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS.GULLHOLMEN                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS.LÖMSK                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IKEA.PS.VÅGÖ                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_INGATORP                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_INGOLF                               <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0…
## $ name_INNAMO                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ISBERGET                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_IVAR                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_JANINGE                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_JOKKMOKK                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_JONAXEL                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KALLAX                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KARLHUGO                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KARLJAN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KIVIK                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KLEPPSTAD                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KLIMPEN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KLIPPAN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KNARREVIK                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KNOPPARP                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KNOTTEN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KOARP                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KOLBJÖRN                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KOPPANG                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KORNSJÖ                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KULLABERG                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KULLEN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KUNGSHAMN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KUNGSHOLMEN                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_KYRRE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LACK                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LÄCKÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LAIVA                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LANDSKRONA                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LÅNGFJÄLL                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LANGUR                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LAPPLAND                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LEIFARNE                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LENNART                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LERBERG                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LERHAMN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LIATORP                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LIDHULT                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LILLÅSEN                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LISABO                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LIXHULT                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LOMMARP                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LOTE                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE.HÅVET                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE.LÖVÅS                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_LYCKSELE.MURBO                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MACKAPÄR                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MALM                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MALSJÖ                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MAMMUT                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MARKERAD                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MARTIN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MÄSTERBY                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MASTHOLMEN                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MICKE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MILSBO                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MOSJÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MOSTORP                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_MUREN                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NANNARP                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NIKKEBY                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NILSOVE                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NILSOVE...NORNA                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NISSE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NOLMYRA                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NOMINELL                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORBERG                              <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDKISA                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDLI                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDMELA                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORDVIKEN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORRÅKER                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORRARYD                             <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ name_NORRNÄS                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NORSBORG                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_NYHAMN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ODDVALD                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ODGER                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_OLAUS                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ÖNSKLIG                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PÅHL                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX                                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...AULI                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...BERGSBO                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...FÄRVIK.AULI                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...FORSAND                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...FORSAND.VIKEDAL                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...GRIMO                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...GRIMO.VIKEDAL                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...HASVIK                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...HOKKSUND                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...MEHAMN                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...MEHAMN.AULI                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...MEHAMN.SEKKEN                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...SEKKEN                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...TYSSEDAL                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...VIKEDAL                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...VINGROM                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PAX...VINTERBRO                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PELLO                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PLATSA                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_POÄNG                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_PRÄSTHOLM                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RÅDVIKEN                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RAKKESTAD                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RÅSKOG                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RAST                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_REGISSÖR                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_REMSTA                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_RÖNNINGE                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SAKARIAS                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SALTHOLMEN                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SANDBACKEN                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SETSKOG                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SJÄLLAND                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKÅDIS                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKARPÖ                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKOGSTORP                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKRUVSTA                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SKUBB                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SLÄKT                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SMÅGÖRA                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SÖDERHAMN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SOLLERÖN                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SONGESAND                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STALLARP                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STEFAN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STIG                                 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOCKHOLM                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOCKHOLM.2017                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOCKSUND                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STOLJAN                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STRANDMON                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUBBARP                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUK                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUVA                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUVA...FÖLJA                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_STUVA...FRITIDS                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SUNDLANDET                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SUNDVIK                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVALNÄS                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVANÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVÄRTA                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVENARNE                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SVENBERTIL                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_SYVDE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TÄRNÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TEODORES                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TERJE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TOBIAS                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TORNVIKEN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TOSSBERG                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TOSTERÖ                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TRANARÖ                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TROFAST                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TROGEN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TRYSIL                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TULLSTA                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_TYSSEDAL                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_ULRIKSBERG                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_URBAN                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VÄDDÖ                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VADHOLMA                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VALLENTUNA                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VATTVIKEN                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VEBERÖD                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VEDBO                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VESKEN                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VIGGJA                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VIKHAMMER                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VILTO                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VIMLE                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VISTHUS                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VITTSJÖ                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VITVAL                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VOLFGANG                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_VUKU                                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_YNGVAR                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ name_new                                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Bookcases...shelving.units       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Cabinets...cupboards             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Chairs                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Chests.of.drawers...drawer.units <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Sofas...armchairs                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Tables...desks                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_TV...media.furniture             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_Wardrobes                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ category_other                            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ category_new                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sellable_online_FALSE.                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sellable_online_TRUE.                     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sellable_online_new                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ other_colors_No                           <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ other_colors_Yes                          <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ other_colors_new                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…

Data Splitting and Cross-Validation Setup

set.seed(1234)
ikea_split <- initial_split(data, prop = 0.75)
ikea_train <- training(ikea_split)
ikea_test <- testing(ikea_split)

set.seed(2345)
ikea_cv <- vfold_cv(ikea_train, v = 5)

Model Specification (XGBoost, Random Forest, SVM)

# XGBoost Model
xgboost_spec <- boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
  set_mode("regression") %>%
  set_engine("xgboost")

# Random Forest Model
rf_spec <- rand_forest(trees = tune(), min_n = tune(), mtry = tune()) %>%
  set_mode("regression") %>%
  set_engine("ranger")

# SVM Model
svm_spec <- svm_rbf(cost = tune(), rbf_sigma = tune()) %>%
  set_mode("regression") %>%
  set_engine("kernlab")

# Create workflows
xgboost_workflow <- workflow() %>%
  add_recipe(ikea_recipe_improved) %>%
  add_model(xgboost_spec)

rf_workflow <- workflow() %>%
  add_recipe(ikea_recipe_improved) %>%
  add_model(rf_spec)

svm_workflow <- workflow() %>%
  add_recipe(ikea_recipe_improved) %>%
  add_model(svm_spec)

Evaluate Models

# Collect metrics for each model
xgboost_results <- collect_metrics(xgboost_tune)
rf_results <- collect_metrics(rf_tune)
svm_results <- collect_metrics(svm_tune)

# Compare RMSE and R-squared for all models
bind_rows(xgboost_results, rf_results, svm_results) %>%
  filter(.metric == "rmse") %>%
  arrange(mean)
## # A tibble: 170 × 12
##     mtry trees min_n learn_rate .metric .estimator  mean     n std_err .config  
##    <int> <int> <int>      <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>    
##  1    NA    NA    NA    NA      rmse    standard   0.390     5  0.0154 Preproce…
##  2     8   196    10     0.0900 rmse    standard   0.511     5  0.0234 Preproce…
##  3    NA    NA    NA    NA      rmse    standard   0.516     5  0.0151 Preproce…
##  4    11  2000     2    NA      rmse    standard   0.534     5  0.0180 Preproce…
##  5    11  1000     2    NA      rmse    standard   0.534     5  0.0186 Preproce…
##  6    11   500     2    NA      rmse    standard   0.535     5  0.0165 Preproce…
##  7    11  1500     2    NA      rmse    standard   0.535     5  0.0177 Preproce…
##  8    11  2000    11    NA      rmse    standard   0.537     5  0.0187 Preproce…
##  9    11  1000    11    NA      rmse    standard   0.538     5  0.0187 Preproce…
## 10    11  1500    11    NA      rmse    standard   0.539     5  0.0190 Preproce…
## # ℹ 160 more rows
## # ℹ 2 more variables: cost <dbl>, rbf_sigma <dbl>

Finalize Best Model and Predictions

# Select best performing model (e.g., XGBoost)
xgboost_best <- finalize_workflow(xgboost_workflow, select_best(xgboost_tune, metric = "rmse"))

# Fit the final model on the entire training set and test it
ikea_fit <- last_fit(xgboost_best, ikea_split)

# Evaluate on test data
test_metrics <- collect_metrics(ikea_fit)
test_predictions <- collect_predictions(ikea_fit)

# Plot actual vs predicted prices
test_predictions %>%
  ggplot(aes(x = price, y = .pred)) +
  geom_point(alpha = 0.3, color = "midnightblue") +
  geom_abline(lty = 2, color = "gray50") +
  coord_fixed() +
  labs(title = "Predicted vs Actual Prices (Test Data)", x = "Actual Price (log-transformed)", y = "Predicted Price")

# Extract RMSE, R-squared, and variance explained
rmse_value <- test_metrics %>%
  filter(.metric == "rmse") %>%
  pull(".estimate")  # Corrected: ".estimate" is the correct column

rsq_value <- test_metrics %>%
  filter(.metric == "rsq") %>%
  pull(".estimate")  # Corrected: ".estimate" is the correct column

variance_explained <- rsq_value * 100

# Print the values
print(paste("RMSE:", rmse_value))
## [1] "RMSE: 0.462790631109112"
print(paste("R-squared:", rsq_value))
## [1] "R-squared: 0.862361131632264"
print(paste("Variance Explained (%):", variance_explained))
## [1] "Variance Explained (%): 86.2361131632264"

Conclusion

The final model for predicting IKEA product prices performed well, achieving an RMSE of 0.4628 and an R-squared of 0.8624. This means that the model explains 86.24% of the variance in the dataset, indicating a high level of accuracy in predicting product prices. While there is still some room for improvement, the model’s current performance suggests that it is reliable for making price predictions based on features like product dimensions, categories, and text descriptions.

Efforts to improve the model included adding new features like product volume, handling missing data with KNN imputation, and refining the recipe with steps like step_other() and step_novel(). These changes helped the model generalize better, and while the RMSE is slightly higher than ideal, the model demonstrates strong predictive capabilities overall.