Goal is to predict attrition #import data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(correlationfunnel)
## ══ Using correlationfunnel? ════════════════════════════════════════════════════
## You might also be interested in applied data science training for business.
## </> Learn more at - www.business-science.io </>
ikea <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2020/2020-11-03/ikea.csv')
## New names:
## Rows: 3694 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): name, category, old_price, link, other_colors, short_description, d... dbl
## (6): ...1, item_id, price, depth, height, width lgl (1): sellable_online
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

#clean data

skimr::skim(ikea)
Data summary
Name ikea
Number of rows 3694
Number of columns 14
_______________________
Column type frequency:
character 7
logical 1
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1 3 27 0 607 0
category 0 1 4 36 0 17 0
old_price 0 1 4 13 0 365 0
link 0 1 52 163 0 2962 0
other_colors 0 1 2 3 0 2 0
short_description 0 1 3 63 0 1706 0
designer 0 1 3 1261 0 381 0

Variable type: logical

skim_variable n_missing complete_rate mean count
sellable_online 0 1 0.99 TRU: 3666, FAL: 28

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
…1 0 1.00 1846.50 1066.51 0 923.25 1846.5 2769.75 3693 ▇▇▇▇▇
item_id 0 1.00 48632396.79 28887094.10 58487 20390574.00 49288078.0 70403572.75 99932615 ▇▇▇▇▇
price 0 1.00 1078.21 1374.65 3 180.90 544.7 1429.50 9585 ▇▁▁▁▁
depth 1463 0.60 54.38 29.96 1 38.00 47.0 60.00 257 ▇▃▁▁▁
height 988 0.73 101.68 61.10 1 67.00 83.0 124.00 700 ▇▂▁▁▁
width 589 0.84 104.47 71.13 1 60.00 80.0 140.00 420 ▇▅▂▁▁
ikea <- ikea %>% na.omit()
factors_vec <- ikea %>% select(item_id, price, depth, height, width, -...1) %>% names() 

ikea_clean <- ikea %>%
  #mutate(Education = Education %>% as.factor()) %>%
  #address factors imported as numeric
  mutate(across(all_of(factors_vec), as.factor)) 
  
  #drop zero variance variables

#explore data

ikea_clean %>% count(name) 
## # A tibble: 289 × 2
##    name               n
##    <chr>          <int>
##  1 ADDE               1
##  2 AGAM               4
##  3 AGEN               1
##  4 ALEX               7
##  5 ALGOT             24
##  6 ALGOT / BROR       1
##  7 ALGOT / SKÅDIS     1
##  8 ANGERSBY           1
##  9 ANTILOP            4
## 10 ARKELSTORP         2
## # ℹ 279 more rows
ikea_clean %>%
  ggplot(aes(name)) +
  geom_bar()

correlation plot

#step 1
ikea_binarized <- ikea_clean %>%
  select(-item_id) %>%
  binarize()

ikea_binarized %>%glimpse()
## Rows: 1,899
## Columns: 180
## $ `...1__-Inf_799.5`                                                                  <dbl> …
## $ ...1__799.5_1653                                                                    <dbl> …
## $ ...1__1653_2474.5                                                                   <dbl> …
## $ ...1__2474.5_Inf                                                                    <dbl> …
## $ name__ALGOT                                                                         <dbl> …
## $ name__BEKANT                                                                        <dbl> …
## $ name__BESTÅ                                                                         <dbl> …
## $ `name__BILLY_/_OXBERG`                                                              <dbl> …
## $ name__BRIMNES                                                                       <dbl> …
## $ name__BROR                                                                          <dbl> …
## $ name__EKET                                                                          <dbl> …
## $ name__GRÖNLID                                                                       <dbl> …
## $ name__HAVSTA                                                                        <dbl> …
## $ name__HAVSTEN                                                                       <dbl> …
## $ name__HEMNES                                                                        <dbl> …
## $ name__IVAR                                                                          <dbl> …
## $ name__JONAXEL                                                                       <dbl> …
## $ name__KALLAX                                                                        <dbl> …
## $ name__LIDHULT                                                                       <dbl> …
## $ name__LIXHULT                                                                       <dbl> …
## $ name__NORDLI                                                                        <dbl> …
## $ name__PAX                                                                           <dbl> …
## $ name__PLATSA                                                                        <dbl> …
## $ `name__STUVA_/_FRITIDS`                                                             <dbl> …
## $ name__TROFAST                                                                       <dbl> …
## $ name__VALLENTUNA                                                                    <dbl> …
## $ name__VIMLE                                                                         <dbl> …
## $ `name__-OTHER`                                                                      <dbl> …
## $ category__Bar_furniture                                                             <dbl> …
## $ category__Beds                                                                      <dbl> …
## $ `category__Bookcases_&_shelving_units`                                              <dbl> …
## $ `category__Cabinets_&_cupboards`                                                    <dbl> …
## $ category__Chairs                                                                    <dbl> …
## $ `category__Chests_of_drawers_&_drawer_units`                                        <dbl> …
## $ `category__Children's_furniture`                                                    <dbl> …
## $ category__Nursery_furniture                                                         <dbl> …
## $ category__Outdoor_furniture                                                         <dbl> …
## $ `category__Sideboards,_buffets_&_console_tables`                                    <dbl> …
## $ `category__Sofas_&_armchairs`                                                       <dbl> …
## $ `category__Tables_&_desks`                                                          <dbl> …
## $ `category__TV_&_media_furniture`                                                    <dbl> …
## $ category__Wardrobes                                                                 <dbl> …
## $ `category__-OTHER`                                                                  <dbl> …
## $ price__175                                                                          <dbl> …
## $ price__195                                                                          <dbl> …
## $ price__225                                                                          <dbl> …
## $ price__275                                                                          <dbl> …
## $ price__295                                                                          <dbl> …
## $ price__345                                                                          <dbl> …
## $ price__375                                                                          <dbl> …
## $ price__395                                                                          <dbl> …
## $ price__495                                                                          <dbl> …
## $ price__595                                                                          <dbl> …
## $ price__695                                                                          <dbl> …
## $ price__995                                                                          <dbl> …
## $ `price__-OTHER`                                                                     <dbl> …
## $ old_price__No_old_price                                                             <dbl> …
## $ `old_price__-OTHER`                                                                 <dbl> …
## $ sellable_online__1                                                                  <dbl> …
## $ `sellable_online__-OTHER`                                                           <dbl> …
## $ `link__https://www.ikea.com/sa/en/p/besta-burs-tv-bench-high-gloss-white-30269129/` <dbl> …
## $ `link__-OTHER`                                                                      <dbl> …
## $ other_colors__No                                                                    <dbl> …
## $ other_colors__Yes                                                                   <dbl> …
## $ `short_description__3-seat_sofa`                                                    <dbl> …
## $ `short_description__3-seat_sofa-bed`                                                <dbl> …
## $ short_description__Armchair                                                         <dbl> …
## $ short_description__Chair                                                            <dbl> …
## $ `short_description__Wardrobe,__________150x66x236_cm`                               <dbl> …
## $ `short_description__-OTHER`                                                         <dbl> …
## $ designer__Andreas_Fredriksson                                                       <dbl> …
## $ designer__Carina_Bengs                                                              <dbl> …
## $ designer__Carl_Öjerstam                                                             <dbl> …
## $ designer__Ebba_Strandmark                                                           <dbl> …
## $ designer__Ehlén_Johansson                                                           <dbl> …
## $ `designer__Ehlén_Johansson/IKEA_of_Sweden`                                          <dbl> …
## $ designer__Eva_Lilja_Löwenhielm                                                      <dbl> …
## $ designer__Francis_Cayouette                                                         <dbl> …
## $ designer__Gillis_Lundgren                                                           <dbl> …
## $ designer__Henrik_Preutz                                                             <dbl> …
## $ designer__IKEA_of_Sweden                                                            <dbl> …
## $ `designer__IKEA_of_Sweden/Ehlén_Johansson`                                          <dbl> …
## $ `designer__IKEA_of_Sweden/Jon_Karlsson`                                             <dbl> …
## $ designer__Johan_Kroon                                                               <dbl> …
## $ designer__Jon_Karlsson                                                              <dbl> …
## $ `designer__Jon_Karlsson/IKEA_of_Sweden`                                             <dbl> …
## $ `designer__K_Hagberg/M_Hagberg`                                                     <dbl> …
## $ designer__Mia_Lagerman                                                              <dbl> …
## $ designer__Nike_Karlsson                                                             <dbl> …
## $ designer__Ola_Wihlborg                                                              <dbl> …
## $ designer__Studio_Copenhagen                                                         <dbl> …
## $ designer__Tord_Björklund                                                            <dbl> …
## $ `designer__-OTHER`                                                                  <dbl> …
## $ depth__25                                                                           <dbl> …
## $ depth__28                                                                           <dbl> …
## $ depth__30                                                                           <dbl> …
## $ depth__35                                                                           <dbl> …
## $ depth__36                                                                           <dbl> …
## $ depth__37                                                                           <dbl> …
## $ depth__38                                                                           <dbl> …
## $ depth__39                                                                           <dbl> …
## $ depth__40                                                                           <dbl> …
## $ depth__41                                                                           <dbl> …
## $ depth__42                                                                           <dbl> …
## $ depth__44                                                                           <dbl> …
## $ depth__45                                                                           <dbl> …
## $ depth__47                                                                           <dbl> …
## $ depth__48                                                                           <dbl> …
## $ depth__49                                                                           <dbl> …
## $ depth__50                                                                           <dbl> …
## $ depth__51                                                                           <dbl> …
## $ depth__52                                                                           <dbl> …
## $ depth__55                                                                           <dbl> …
## $ depth__57                                                                           <dbl> …
## $ depth__58                                                                           <dbl> …
## $ depth__60                                                                           <dbl> …
## $ depth__66                                                                           <dbl> …
## $ depth__82                                                                           <dbl> …
## $ depth__94                                                                           <dbl> …
## $ depth__98                                                                           <dbl> …
## $ depth__164                                                                          <dbl> …
## $ `depth__-OTHER`                                                                     <dbl> …
## $ height__35                                                                          <dbl> …
## $ height__38                                                                          <dbl> …
## $ height__45                                                                          <dbl> …
## $ height__64                                                                          <dbl> …
## $ height__70                                                                          <dbl> …
## $ height__74                                                                          <dbl> …
## $ height__75                                                                          <dbl> …
## $ height__76                                                                          <dbl> …
## $ height__78                                                                          <dbl> …
## $ height__79                                                                          <dbl> …
## $ height__80                                                                          <dbl> …
## $ height__83                                                                          <dbl> …
## $ height__84                                                                          <dbl> …
## $ height__85                                                                          <dbl> …
## $ height__87                                                                          <dbl> …
## $ height__90                                                                          <dbl> …
## $ height__91                                                                          <dbl> …
## $ height__95                                                                          <dbl> …
## $ height__100                                                                         <dbl> …
## $ height__101                                                                         <dbl> …
## $ height__102                                                                         <dbl> …
## $ height__104                                                                         <dbl> …
## $ height__128                                                                         <dbl> …
## $ height__147                                                                         <dbl> …
## $ height__176                                                                         <dbl> …
## $ height__179                                                                         <dbl> …
## $ height__190                                                                         <dbl> …
## $ height__192                                                                         <dbl> …
## $ height__197                                                                         <dbl> …
## $ height__201                                                                         <dbl> …
## $ height__202                                                                         <dbl> …
## $ height__236                                                                         <dbl> …
## $ `height__-OTHER`                                                                    <dbl> …
## $ width__35                                                                           <dbl> …
## $ width__40                                                                           <dbl> …
## $ width__41                                                                           <dbl> …
## $ width__42                                                                           <dbl> …
## $ width__44                                                                           <dbl> …
## $ width__45                                                                           <dbl> …
## $ width__46                                                                           <dbl> …
## $ width__50                                                                           <dbl> …
## $ width__56                                                                           <dbl> …
## $ width__60                                                                           <dbl> …
## $ width__70                                                                           <dbl> …
## $ width__80                                                                           <dbl> …
## $ width__89                                                                           <dbl> …
## $ width__90                                                                           <dbl> …
## $ width__99                                                                           <dbl> …
## $ width__100                                                                          <dbl> …
## $ width__120                                                                          <dbl> …
## $ width__140                                                                          <dbl> …
## $ width__150                                                                          <dbl> …
## $ width__160                                                                          <dbl> …
## $ width__180                                                                          <dbl> …
## $ width__200                                                                          <dbl> …
## $ width__240                                                                          <dbl> …
## $ width__300                                                                          <dbl> …
## $ `width__-OTHER`                                                                     <dbl> …
#step 2L correlation
ikea_correlation <- ikea_binarized %>%
  correlate(sellable_online__1)
## Warning: Expected 2 pieces. Additional pieces discarded in 1 rows [69].
ikea_correlation
## # A tibble: 180 × 3
##    feature         bin                  correlation
##    <fct>           <chr>                      <dbl>
##  1 sellable_online 1                          1    
##  2 sellable_online -OTHER                    -1    
##  3 name            TROFAST                   -0.332
##  4 width           42                        -0.327
##  5 depth           30                        -0.235
##  6 category        Children's_furniture      -0.144
##  7 category        Nursery_furniture         -0.128
##  8 height          101                       -0.113
##  9 designer        Studio_Copenhagen         -0.112
## 10 price           275                       -0.110
## # ℹ 170 more rows
#step 3: plot
ikea_correlation %>%
  correlationfunnel::plot_correlation_funnel()
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the correlationfunnel package.
##   Please report the issue at
##   <https://github.com/business-science/correlationfunnel/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the correlationfunnel package.
##   Please report the issue at
##   <https://github.com/business-science/correlationfunnel/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 162 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps