Load data

plastic_raw <- readr::read_csv(
  "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-26/plastics.csv",
  show_col_types = FALSE
)

plastic_raw
## # A tibble: 13,380 × 14
##    country   year parent_company empty  hdpe  ldpe     o   pet    pp    ps   pvc
##    <chr>    <dbl> <chr>          <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Argenti…  2019 Grand Total        0   215    55   607  1376   281   116    18
##  2 Argenti…  2019 Unbranded          0   155    50   532   848   122   114    17
##  3 Argenti…  2019 The Coca-Cola…     0     0     0     0   222    35     0     0
##  4 Argenti…  2019 Secco              0     0     0     0    39     4     0     0
##  5 Argenti…  2019 Doble Cola         0     0     0     0    38     0     0     0
##  6 Argenti…  2019 Pritty             0     0     0     0    22     7     0     0
##  7 Argenti…  2019 PepsiCo            0     0     0     0    21     6     0     0
##  8 Argenti…  2019 Casoni             0     0     0     0    26     0     0     0
##  9 Argenti…  2019 Villa Del Sur…     0     0     0     0    19     1     0     0
## 10 Argenti…  2019 Manaos             0     0     0     0    14     4     0     0
## # ℹ 13,370 more rows
## # ℹ 3 more variables: grand_total <dbl>, num_events <dbl>, volunteers <dbl>

Tidy data

plastic_tidy <- plastic_raw %>%
  tidyr::pivot_longer(
    cols = c(empty, hdpe, ldpe, o, pet, pp, ps, pvc, grand_total, num_events),
    names_to = "plastic_type",
    values_to = "count"
  )

plastic_tidy
## # A tibble: 133,800 × 6
##    country    year parent_company volunteers plastic_type count
##    <chr>     <dbl> <chr>               <dbl> <chr>        <dbl>
##  1 Argentina  2019 Grand Total           243 empty            0
##  2 Argentina  2019 Grand Total           243 hdpe           215
##  3 Argentina  2019 Grand Total           243 ldpe            55
##  4 Argentina  2019 Grand Total           243 o              607
##  5 Argentina  2019 Grand Total           243 pet           1376
##  6 Argentina  2019 Grand Total           243 pp             281
##  7 Argentina  2019 Grand Total           243 ps             116
##  8 Argentina  2019 Grand Total           243 pvc             18
##  9 Argentina  2019 Grand Total           243 grand_total   2668
## 10 Argentina  2019 Grand Total           243 num_events       4
## # ℹ 133,790 more rows

Select and filter

plastic_simple <- plastic_tidy %>%
  select(country, year, parent_company, plastic_type, count)

plastic_arg_2019 <- plastic_simple %>%
  filter(country == "Argentina", year == 2019, !is.na(count))

plastic_simple
## # A tibble: 133,800 × 5
##    country    year parent_company plastic_type count
##    <chr>     <dbl> <chr>          <chr>        <dbl>
##  1 Argentina  2019 Grand Total    empty            0
##  2 Argentina  2019 Grand Total    hdpe           215
##  3 Argentina  2019 Grand Total    ldpe            55
##  4 Argentina  2019 Grand Total    o              607
##  5 Argentina  2019 Grand Total    pet           1376
##  6 Argentina  2019 Grand Total    pp             281
##  7 Argentina  2019 Grand Total    ps             116
##  8 Argentina  2019 Grand Total    pvc             18
##  9 Argentina  2019 Grand Total    grand_total   2668
## 10 Argentina  2019 Grand Total    num_events       4
## # ℹ 133,790 more rows
plastic_arg_2019
## # A tibble: 2,750 × 5
##    country    year parent_company plastic_type count
##    <chr>     <dbl> <chr>          <chr>        <dbl>
##  1 Argentina  2019 Grand Total    empty            0
##  2 Argentina  2019 Grand Total    hdpe           215
##  3 Argentina  2019 Grand Total    ldpe            55
##  4 Argentina  2019 Grand Total    o              607
##  5 Argentina  2019 Grand Total    pet           1376
##  6 Argentina  2019 Grand Total    pp             281
##  7 Argentina  2019 Grand Total    ps             116
##  8 Argentina  2019 Grand Total    pvc             18
##  9 Argentina  2019 Grand Total    grand_total   2668
## 10 Argentina  2019 Grand Total    num_events       4
## # ℹ 2,740 more rows

Group by country and year

total_by_country_year <- plastic_simple %>%
  group_by(country, year) %>%
  summarize(total_count = sum(count, na.rm = TRUE), .groups = "drop")

total_by_country_year
## # A tibble: 107 × 3
##    country     year total_count
##    <chr>      <dbl>       <dbl>
##  1 Argentina   2019       11772
##  2 Argentina   2020        7596
##  3 Armenia     2020          20
##  4 Australia   2019          14
##  5 Australia   2020        3957
##  6 Bangladesh  2019          90
##  7 Bangladesh  2020        4908
##  8 Benin       2019       19954
##  9 Benin       2020         688
## 10 Bhutan      2019       14005
## # ℹ 97 more rows

Mutate example

plastic_mutated <- plastic_arg_2019 %>%
  mutate(
    large_count = count > 100,
    country_lower = tolower(country)
  )

plastic_mutated
## # A tibble: 2,750 × 7
##    country    year parent_company plastic_type count large_count country_lower
##    <chr>     <dbl> <chr>          <chr>        <dbl> <lgl>       <chr>        
##  1 Argentina  2019 Grand Total    empty            0 FALSE       argentina    
##  2 Argentina  2019 Grand Total    hdpe           215 TRUE        argentina    
##  3 Argentina  2019 Grand Total    ldpe            55 FALSE       argentina    
##  4 Argentina  2019 Grand Total    o              607 TRUE        argentina    
##  5 Argentina  2019 Grand Total    pet           1376 TRUE        argentina    
##  6 Argentina  2019 Grand Total    pp             281 TRUE        argentina    
##  7 Argentina  2019 Grand Total    ps             116 TRUE        argentina    
##  8 Argentina  2019 Grand Total    pvc             18 FALSE       argentina    
##  9 Argentina  2019 Grand Total    grand_total   2668 TRUE        argentina    
## 10 Argentina  2019 Grand Total    num_events       4 FALSE       argentina    
## # ℹ 2,740 more rows

Summary for 2019

plastic_2019_summary <- total_by_country_year %>%
  filter(year == 2019)

plastic_2019_summary
## # A tibble: 52 × 3
##    country       year total_count
##    <chr>        <dbl>       <dbl>
##  1 Argentina     2019       11772
##  2 Australia     2019          14
##  3 Bangladesh    2019          90
##  4 Benin         2019       19954
##  5 Bhutan        2019       14005
##  6 Brazil        2019       19338
##  7 Bulgaria      2019          69
##  8 Burkina Faso  2019       23299
##  9 Cameroon      2019       35900
## 10 Canada        2019          30
## # ℹ 42 more rows

Histogram: distribution of plastic counts per record

plastic_simple %>%
  filter(!is.na(count), count > 0) %>%
  ggplot(aes(x = count)) +
  geom_histogram(bins = 40, fill = "#3182bd", color = "white") +
  scale_x_log10() +
  labs(
    title = "Distribution of plastic counts",
    subtitle = "Positive counts per plastic type entry across all countries and years",
    x = "Count",
    y = "Number of records"
  ) +
  theme_minimal()

Bar chart: top parent companies in Argentina (2019)

plastic_arg_2019 %>%
  group_by(parent_company) %>%
  summarize(total_count = sum(count), .groups = "drop") %>%
  slice_max(total_count, n = 10) %>%
  mutate(parent_company = fct_reorder(parent_company, total_count)) %>%
  ggplot(aes(x = parent_company, y = total_count)) +
  geom_col(fill = "#31a354") +
  coord_flip() +
  labs(
    title = "Top parent companies collecting plastic in Argentina, 2019",
    x = "Parent company",
    y = "Total plastic count"
  ) +
  theme_minimal()

Scatter plot: total PET counts by parent company and year

pet_by_company <- plastic_tidy %>%
  filter(plastic_type == "pet") %>%
  group_by(year, parent_company) %>%
  summarize(total_pet = sum(count, na.rm = TRUE), .groups = "drop") %>%
  filter(total_pet > 0)

pet_by_company %>%
  group_by(year) %>%
  slice_max(total_pet, n = 10) %>%
  ggplot(aes(
    x = reorder(parent_company, total_pet),
    y = total_pet,
    fill = factor(year)
  )) +
  geom_col() +
  coord_flip() +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Top Parent Companies Producing PET Waste Each Year",
    subtitle = "PET is among the most commonly recycled but still environmentally persistent plastics",
    x = "Parent Company",
    y = "Total PET Count",
    fill = "Year"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

dplyr and data.table summaries

# dplyr
summ_dplyr <- plastic_tidy %>%
  group_by(plastic_type) %>%
  summarize(total = sum(count, na.rm = TRUE), .groups = "drop")

summ_dplyr
## # A tibble: 10 × 2
##    plastic_type   total
##    <chr>          <dbl>
##  1 empty           4174
##  2 grand_total  1204956
##  3 hdpe           35742
##  4 ldpe          116640
##  5 num_events    446488
##  6 o             650583
##  7 pet           275700
##  8 pp             97696
##  9 ps             21243
## 10 pvc             3169
# data.table
plastic_dt <- as.data.table(plastic_tidy)

summ_dt <- plastic_dt[, .(total = sum(count, na.rm = TRUE)), by = plastic_type]

summ_dt
##     plastic_type   total
##           <char>   <num>
##  1:        empty    4174
##  2:         hdpe   35742
##  3:         ldpe  116640
##  4:            o  650583
##  5:          pet  275700
##  6:           pp   97696
##  7:           ps   21243
##  8:          pvc    3169
##  9:  grand_total 1204956
## 10:   num_events  446488

Top plastic types

# dplyr
summ_dplyr %>%
  arrange(desc(total))
## # A tibble: 10 × 2
##    plastic_type   total
##    <chr>          <dbl>
##  1 grand_total  1204956
##  2 o             650583
##  3 num_events    446488
##  4 pet           275700
##  5 ldpe          116640
##  6 pp             97696
##  7 hdpe           35742
##  8 ps             21243
##  9 empty           4174
## 10 pvc             3169
# data.table
summ_dt[order(-total)]
##     plastic_type   total
##           <char>   <num>
##  1:  grand_total 1204956
##  2:            o  650583
##  3:   num_events  446488
##  4:          pet  275700
##  5:         ldpe  116640
##  6:           pp   97696
##  7:         hdpe   35742
##  8:           ps   21243
##  9:        empty    4174
## 10:          pvc    3169