Import your data
# csv file
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
## Rows: 2088 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): occupation, major_category, minor_category
## dbl (9): year, total_workers, workers_male, workers_female, percent_female, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
employees <- jobs_gender %>%
select(total_workers, workers_male, workers_female)
Pivoting
long to wide form
employees %>%
pivot_wider(names_from = total_workers, values_from = workers_male)
## # A tibble: 2,005 × 2,072
## workers_fem…¹ 10242…² 97728…³ `14815` `43015` 75451…⁴ `44198` 10970…⁵ 48904…⁶
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 241859 782400 NA NA NA NA NA NA NA
## 2 295657 NA 681627 NA NA NA NA NA NA
## 3 6440 NA NA 8375 NA NA NA NA NA
## 4 25240 NA NA NA 17775 NA NA NA NA
## 5 314436 NA NA NA NA 440078 NA NA NA
## 6 28057 NA NA NA NA NA 16141 NA NA
## 7 36830 NA NA NA NA NA NA 72873 NA
## 8 134679 NA NA NA NA NA NA NA 354369
## 9 529769 NA NA NA NA NA NA NA NA
## 10 11269 NA NA NA NA NA NA NA NA
## # … with 1,995 more rows, 2,063 more variables: `990611` <dbl>, `14656` <dbl>,
## # `307004` <dbl>, `41185` <dbl>, `218198` <dbl>, `175540` <dbl>,
## # `185512` <dbl>, `499775` <dbl>, `469706` <dbl>, `708239` <dbl>,
## # `132575` <dbl>, `740359` <dbl>, `5439` <dbl>, `17294` <dbl>, `96419` <dbl>,
## # `546042` <dbl>, `16766` <dbl>, `28282` <dbl>, `414232` <dbl>,
## # `279952` <dbl>, `6422` <dbl>, `3034121` <dbl>, `28413` <dbl>, `7184` <dbl>,
## # `170412` <dbl>, `220927` <dbl>, `257730` <dbl>, `193020` <dbl>, …
employees %>% slice(-10) %>% arrange(total_workers, workers_male)
## # A tibble: 2,087 × 3
## total_workers workers_male workers_female
## <dbl> <dbl> <dbl>
## 1 658 658 0
## 2 732 722 10
## 3 747 747 0
## 4 836 358 478
## 5 847 847 0
## 6 929 493 436
## 7 947 338 609
## 8 972 972 0
## 9 1043 706 337
## 10 1209 1105 104
## # … with 2,077 more rows
Separating and Uniting
Separate a column
Unite two columns
employees %>%
unite(col = population, sep = "/")
## # A tibble: 2,088 × 1
## population
## <chr>
## 1 1024259/782400/241859
## 2 977284/681627/295657
## 3 14815/8375/6440
## 4 43015/17775/25240
## 5 754514/440078/314436
## 6 44198/16141/28057
## 7 109703/72873/36830
## 8 489048/354369/134679
## 9 990611/460842/529769
## 10 14656/3387/11269
## # … with 2,078 more rows
Missing Values