dummy variables in R

ifelse

set.seed(123)
pg %>% 
  mutate(dummy_sex = ifelse(sex == "female", 0, 1)) %>% 
  select(sex, dummy_sex) %>%
  sample_n(6, replace = F)
## # A tibble: 6 x 2
##   sex    dummy_sex
##   <fct>      <dbl>
## 1 <NA>          NA
## 2 male           1
## 3 female         0
## 4 male           1
## 5 male           1
## 6 female         0
set.seed(123)
pg %>% 
  mutate(dummy_Chinstrap = ifelse(species == "Chinstrap", 1, 0),
         dummy_Gentoo = ifelse(species == "Gentoo", 1, 0)) %>% 
  select(species, starts_with("dummy_")) %>%
  sample_n(6, replace = F)
## # A tibble: 6 x 3
##   species   dummy_Chinstrap dummy_Gentoo
##   <fct>               <dbl>        <dbl>
## 1 Gentoo                  0            1
## 2 Adelie                  0            0
## 3 Gentoo                  0            1
## 4 Chinstrap               1            0
## 5 Adelie                  0            0
## 6 Chinstrap               1            0

model.matrix

set.seed(123)
pg %>% 
  model.matrix( ~ species + sex, data = .) %>% .[, -1] %>% 
  bind_cols(na.omit(pg)) %>% 
  sample_n(6, replace = F)
## # A tibble: 6 x 11
##   species~1 speci~2 sexmale species island bill_~3 bill_~4 flipp~5 body_~6 sex  
##       <dbl>   <dbl>   <dbl> <fct>   <fct>    <dbl>   <dbl>   <int>   <int> <fct>
## 1         0       1       1 Gentoo  Biscoe    59.6    17       230    6050 male 
## 2         0       0       0 Adelie  Torge~    34.4    18.4     184    3325 fema~
## 3         0       1       1 Gentoo  Biscoe    45.2    15.8     215    5300 male 
## 4         1       0       1 Chinst~ Dream     49      19.5     210    3950 male 
## 5         0       0       1 Adelie  Torge~    41.4    18.5     202    3875 male 
## 6         1       0       1 Chinst~ Dream     51      18.8     203    4100 male 
## # ... with 1 more variable: year <int>, and abbreviated variable names
## #   1: speciesChinstrap, 2: speciesGentoo, 3: bill_length_mm, 4: bill_depth_mm,
## #   5: flipper_length_mm, 6: body_mass_g

fastDummies::dummy_cols

library(fastDummies)
# n category, n dummies
set.seed(123)
pg %>% drop_na() %>% # na omit
  dummy_cols(select_columns = c("species", "sex")) %>% 
  select(starts_with("species"), starts_with("sex")) %>% 
  sample_n(6, replace = F)
## # A tibble: 6 x 7
##   species   species_Adelie species_Chinstrap species_Gen~1 sex   sex_f~2 sex_m~3
##   <fct>              <int>             <int>         <int> <fct>   <int>   <int>
## 1 Gentoo                 0                 0             1 male        0       1
## 2 Adelie                 1                 0             0 fema~       1       0
## 3 Gentoo                 0                 0             1 male        0       1
## 4 Chinstrap              0                 1             0 male        0       1
## 5 Adelie                 1                 0             0 male        0       1
## 6 Chinstrap              0                 1             0 male        0       1
## # ... with abbreviated variable names 1: species_Gentoo, 2: sex_female,
## #   3: sex_male
# n category, (n-1) dummies
set.seed(123)
pg %>% drop_na() %>% # na omit
  dummy_cols(select_columns = c("species", "sex"),
             remove_first_dummy = T) %>% 
  select(starts_with("species"), starts_with("sex")) %>% 
  sample_n(6, replace = F)
## # A tibble: 6 x 5
##   species   species_Chinstrap species_Gentoo sex    sex_male
##   <fct>                 <int>          <int> <fct>     <int>
## 1 Gentoo                    0              1 male          1
## 2 Adelie                    0              0 female        0
## 3 Gentoo                    0              1 male          1
## 4 Chinstrap                 1              0 male          1
## 5 Adelie                    0              0 male          1
## 6 Chinstrap                 1              0 male          1
# n category, (n-1) dummies, remove original variable
set.seed(123)
pg %>% drop_na() %>% # na omit
  dummy_cols(select_columns = c("species", "sex"), 
             remove_first_dummy = T,
             remove_selected_columns = T) %>% 
  select(starts_with("species"), starts_with("sex")) %>% 
  sample_n(6, replace = F)
## # A tibble: 6 x 3
##   species_Chinstrap species_Gentoo sex_male
##               <int>          <int>    <int>
## 1                 0              1        1
## 2                 0              0        0
## 3                 0              1        1
## 4                 1              0        1
## 5                 0              0        1
## 6                 1              0        1

caret::dummyVars

library(caret)
set.seed(123)
pg %>% 
  dummyVars(~ species + sex, data = ., sep = "_", fullRank = T) %>% 
  predict(newdata = pg) %>% 
  bind_cols(pg) %>% 
  sample_n(6, replace = F)
## # A tibble: 6 x 11
##   species~1 speci~2 sex_m~3 species island bill_~4 bill_~5 flipp~6 body_~7 sex  
##       <dbl>   <dbl>   <dbl> <fct>   <fct>    <dbl>   <dbl>   <int>   <int> <fct>
## 1         0       1      NA Gentoo  Biscoe    44.5    14.3     216    4100 <NA> 
## 2         0       0       1 Adelie  Torge~    38.6    21.2     191    3800 male 
## 3         0       1       0 Gentoo  Biscoe    45.3    13.7     210    4300 fema~
## 4         1       0       1 Chinst~ Dream     52.8    20       205    4550 male 
## 5         0       0       1 Adelie  Torge~    37.3    20.5     199    3775 male 
## 6         1       0       0 Chinst~ Dream     43.2    16.6     187    2900 fema~
## # ... with 1 more variable: year <int>, and abbreviated variable names
## #   1: species_Chinstrap, 2: species_Gentoo, 3: sex_male, 4: bill_length_mm,
## #   5: bill_depth_mm, 6: flipper_length_mm, 7: body_mass_g

sjmise::to_dummy

library(sjmisc)
pg %>% 
  to_dummy(species, sex, suffix = c("label")) %>% 
  head
##   species_Adelie species_Chinstrap species_Gentoo sex_female sex_male
## 1              1                 0              0          0        1
## 2              1                 0              0          1        0
## 3              1                 0              0          1        0
## 4              1                 0              0         NA       NA
## 5              1                 0              0          1        0
## 6              1                 0              0          0        1

modeldb::add_dummy_variables

library(modeldb)
pg %>% 
  add_dummy_variables(x = species, values = ("Gentoo"), remove_original = F, auto_values = T) %>% 
  head
## # A tibble: 6 x 9
##   species island    bill_length_mm bill_de~1 flipp~2 body_~3 sex    year speci~4
##   <fct>   <fct>              <dbl>     <dbl>   <int>   <int> <fct> <int>   <dbl>
## 1 Adelie  Torgersen           39.1      18.7     181    3750 male   2007       0
## 2 Adelie  Torgersen           39.5      17.4     186    3800 fema~  2007       0
## 3 Adelie  Torgersen           40.3      18       195    3250 fema~  2007       0
## 4 Adelie  Torgersen           NA        NA        NA      NA <NA>   2007       0
## 5 Adelie  Torgersen           36.7      19.3     193    3450 fema~  2007       0
## 6 Adelie  Torgersen           39.3      20.6     190    3650 male   2007       0
## # ... with abbreviated variable names 1: bill_depth_mm, 2: flipper_length_mm,
## #   3: body_mass_g, 4: species_Gentoo