dummy variables in R
ifelse
set.seed(123)
pg %>%
mutate(dummy_sex = ifelse(sex == "female", 0, 1)) %>%
select(sex, dummy_sex) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 2
## sex dummy_sex
## <fct> <dbl>
## 1 <NA> NA
## 2 male 1
## 3 female 0
## 4 male 1
## 5 male 1
## 6 female 0
set.seed(123)
pg %>%
mutate(dummy_Chinstrap = ifelse(species == "Chinstrap", 1, 0),
dummy_Gentoo = ifelse(species == "Gentoo", 1, 0)) %>%
select(species, starts_with("dummy_")) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 3
## species dummy_Chinstrap dummy_Gentoo
## <fct> <dbl> <dbl>
## 1 Gentoo 0 1
## 2 Adelie 0 0
## 3 Gentoo 0 1
## 4 Chinstrap 1 0
## 5 Adelie 0 0
## 6 Chinstrap 1 0
model.matrix
set.seed(123)
pg %>%
model.matrix( ~ species + sex, data = .) %>% .[, -1] %>%
bind_cols(na.omit(pg)) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 11
## species~1 speci~2 sexmale species island bill_~3 bill_~4 flipp~5 body_~6 sex
## <dbl> <dbl> <dbl> <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 0 1 1 Gentoo Biscoe 59.6 17 230 6050 male
## 2 0 0 0 Adelie Torge~ 34.4 18.4 184 3325 fema~
## 3 0 1 1 Gentoo Biscoe 45.2 15.8 215 5300 male
## 4 1 0 1 Chinst~ Dream 49 19.5 210 3950 male
## 5 0 0 1 Adelie Torge~ 41.4 18.5 202 3875 male
## 6 1 0 1 Chinst~ Dream 51 18.8 203 4100 male
## # ... with 1 more variable: year <int>, and abbreviated variable names
## # 1: speciesChinstrap, 2: speciesGentoo, 3: bill_length_mm, 4: bill_depth_mm,
## # 5: flipper_length_mm, 6: body_mass_g
fastDummies::dummy_cols
library(fastDummies)
# n category, n dummies
set.seed(123)
pg %>% drop_na() %>% # na omit
dummy_cols(select_columns = c("species", "sex")) %>%
select(starts_with("species"), starts_with("sex")) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 7
## species species_Adelie species_Chinstrap species_Gen~1 sex sex_f~2 sex_m~3
## <fct> <int> <int> <int> <fct> <int> <int>
## 1 Gentoo 0 0 1 male 0 1
## 2 Adelie 1 0 0 fema~ 1 0
## 3 Gentoo 0 0 1 male 0 1
## 4 Chinstrap 0 1 0 male 0 1
## 5 Adelie 1 0 0 male 0 1
## 6 Chinstrap 0 1 0 male 0 1
## # ... with abbreviated variable names 1: species_Gentoo, 2: sex_female,
## # 3: sex_male
# n category, (n-1) dummies
set.seed(123)
pg %>% drop_na() %>% # na omit
dummy_cols(select_columns = c("species", "sex"),
remove_first_dummy = T) %>%
select(starts_with("species"), starts_with("sex")) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 5
## species species_Chinstrap species_Gentoo sex sex_male
## <fct> <int> <int> <fct> <int>
## 1 Gentoo 0 1 male 1
## 2 Adelie 0 0 female 0
## 3 Gentoo 0 1 male 1
## 4 Chinstrap 1 0 male 1
## 5 Adelie 0 0 male 1
## 6 Chinstrap 1 0 male 1
# n category, (n-1) dummies, remove original variable
set.seed(123)
pg %>% drop_na() %>% # na omit
dummy_cols(select_columns = c("species", "sex"),
remove_first_dummy = T,
remove_selected_columns = T) %>%
select(starts_with("species"), starts_with("sex")) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 3
## species_Chinstrap species_Gentoo sex_male
## <int> <int> <int>
## 1 0 1 1
## 2 0 0 0
## 3 0 1 1
## 4 1 0 1
## 5 0 0 1
## 6 1 0 1
caret::dummyVars
library(caret)
set.seed(123)
pg %>%
dummyVars(~ species + sex, data = ., sep = "_", fullRank = T) %>%
predict(newdata = pg) %>%
bind_cols(pg) %>%
sample_n(6, replace = F)
## # A tibble: 6 x 11
## species~1 speci~2 sex_m~3 species island bill_~4 bill_~5 flipp~6 body_~7 sex
## <dbl> <dbl> <dbl> <fct> <fct> <dbl> <dbl> <int> <int> <fct>
## 1 0 1 NA Gentoo Biscoe 44.5 14.3 216 4100 <NA>
## 2 0 0 1 Adelie Torge~ 38.6 21.2 191 3800 male
## 3 0 1 0 Gentoo Biscoe 45.3 13.7 210 4300 fema~
## 4 1 0 1 Chinst~ Dream 52.8 20 205 4550 male
## 5 0 0 1 Adelie Torge~ 37.3 20.5 199 3775 male
## 6 1 0 0 Chinst~ Dream 43.2 16.6 187 2900 fema~
## # ... with 1 more variable: year <int>, and abbreviated variable names
## # 1: species_Chinstrap, 2: species_Gentoo, 3: sex_male, 4: bill_length_mm,
## # 5: bill_depth_mm, 6: flipper_length_mm, 7: body_mass_g
sjmise::to_dummy
library(sjmisc)
pg %>%
to_dummy(species, sex, suffix = c("label")) %>%
head
## species_Adelie species_Chinstrap species_Gentoo sex_female sex_male
## 1 1 0 0 0 1
## 2 1 0 0 1 0
## 3 1 0 0 1 0
## 4 1 0 0 NA NA
## 5 1 0 0 1 0
## 6 1 0 0 0 1
modeldb::add_dummy_variables
library(modeldb)
pg %>%
add_dummy_variables(x = species, values = ("Gentoo"), remove_original = F, auto_values = T) %>%
head
## # A tibble: 6 x 9
## species island bill_length_mm bill_de~1 flipp~2 body_~3 sex year speci~4
## <fct> <fct> <dbl> <dbl> <int> <int> <fct> <int> <dbl>
## 1 Adelie Torgersen 39.1 18.7 181 3750 male 2007 0
## 2 Adelie Torgersen 39.5 17.4 186 3800 fema~ 2007 0
## 3 Adelie Torgersen 40.3 18 195 3250 fema~ 2007 0
## 4 Adelie Torgersen NA NA NA NA <NA> 2007 0
## 5 Adelie Torgersen 36.7 19.3 193 3450 fema~ 2007 0
## 6 Adelie Torgersen 39.3 20.6 190 3650 male 2007 0
## # ... with abbreviated variable names 1: bill_depth_mm, 2: flipper_length_mm,
## # 3: body_mass_g, 4: species_Gentoo