colSums(is.na(pg))
## species island bill_length_mm bill_depth_mm
## 0 0 2 2
## flipper_length_mm body_mass_g sex year
## 2 2 11 0
pg_drop_na <- pg %>% drop_na
pg_drop_na_base <- pg[complete.cases(pg), ]
head(pg_drop_na == pg_drop_na_base)
## species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE
## [4,] TRUE TRUE TRUE TRUE TRUE TRUE
## [5,] TRUE TRUE TRUE TRUE TRUE TRUE
## [6,] TRUE TRUE TRUE TRUE TRUE TRUE
## sex year
## [1,] TRUE TRUE
## [2,] TRUE TRUE
## [3,] TRUE TRUE
## [4,] TRUE TRUE
## [5,] TRUE TRUE
## [6,] TRUE TRUE
colSums((pg_drop_na == pg_drop_na_base) == TRUE)
## species island bill_length_mm bill_depth_mm
## 333 333 333 333
## flipper_length_mm body_mass_g sex year
## 333 333 333 333
colSums((pg_drop_na == pg_drop_na_base) == FALSE)
## species island bill_length_mm bill_depth_mm
## 0 0 0 0
## flipper_length_mm body_mass_g sex year
## 0 0 0 0
pg %>%
fill(everything(), .direction = "updown") %>%
summarise(across(everything(), ~sum(is.na(.))))
## # A tibble: 1 x 8
## species island bill_length_mm bill_depth_mm flipper_leng~1 body_~2 sex year
## <int> <int> <int> <int> <int> <int> <int> <int>
## 1 0 0 0 0 0 0 0 0
## # ... with abbreviated variable names 1: flipper_length_mm, 2: body_mass_g
pg %>%
mutate(across(where(is.numeric), ~as.numeric(.))) %>%
mutate(across(where(is.numeric), ~replace_na(., mean(., na.rm = T)))) %>%
mutate(across(where(is.factor), ~replace_na(., rstatix::get_mode(.)))) %>%
filter(!complete.cases(pg))
## # A tibble: 11 x 8
## species island bill_length_mm bill_depth_mm flipper_~1 body_~2 sex year
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <fct> <dbl>
## 1 Adelie Torgersen 43.9 17.2 201. 4202. male 2007
## 2 Adelie Torgersen 34.1 18.1 193 3475 male 2007
## 3 Adelie Torgersen 42 20.2 190 4250 male 2007
## 4 Adelie Torgersen 37.8 17.1 186 3300 male 2007
## 5 Adelie Torgersen 37.8 17.3 180 3700 male 2007
## 6 Adelie Dream 37.5 18.9 179 2975 male 2007
## 7 Gentoo Biscoe 44.5 14.3 216 4100 male 2007
## 8 Gentoo Biscoe 46.2 14.4 214 4650 male 2008
## 9 Gentoo Biscoe 47.3 13.8 216 4725 male 2009
## 10 Gentoo Biscoe 44.5 15.7 217 4875 male 2009
## 11 Gentoo Biscoe 43.9 17.2 201. 4202. male 2009
## # ... with abbreviated variable names 1: flipper_length_mm, 2: body_mass_g
pg[!complete.cases(pg), ]
## # A tibble: 11 x 8
## species island bill_length_mm bill_depth_mm flipper_~1 body_~2 sex year
## <fct> <fct> <dbl> <dbl> <int> <int> <fct> <int>
## 1 Adelie Torgersen NA NA NA NA <NA> 2007
## 2 Adelie Torgersen 34.1 18.1 193 3475 <NA> 2007
## 3 Adelie Torgersen 42 20.2 190 4250 <NA> 2007
## 4 Adelie Torgersen 37.8 17.1 186 3300 <NA> 2007
## 5 Adelie Torgersen 37.8 17.3 180 3700 <NA> 2007
## 6 Adelie Dream 37.5 18.9 179 2975 <NA> 2007
## 7 Gentoo Biscoe 44.5 14.3 216 4100 <NA> 2007
## 8 Gentoo Biscoe 46.2 14.4 214 4650 <NA> 2008
## 9 Gentoo Biscoe 47.3 13.8 216 4725 <NA> 2009
## 10 Gentoo Biscoe 44.5 15.7 217 4875 <NA> 2009
## 11 Gentoo Biscoe NA NA NA NA <NA> 2009
## # ... with abbreviated variable names 1: flipper_length_mm, 2: body_mass_g
.EOF.