Load data
plastic_raw <- readr::read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-26/plastics.csv",
show_col_types = FALSE
)
plastic_raw
## # A tibble: 13,380 × 14
## country year parent_company empty hdpe ldpe o pet pp ps pvc
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Argenti… 2019 Grand Total 0 215 55 607 1376 281 116 18
## 2 Argenti… 2019 Unbranded 0 155 50 532 848 122 114 17
## 3 Argenti… 2019 The Coca-Cola… 0 0 0 0 222 35 0 0
## 4 Argenti… 2019 Secco 0 0 0 0 39 4 0 0
## 5 Argenti… 2019 Doble Cola 0 0 0 0 38 0 0 0
## 6 Argenti… 2019 Pritty 0 0 0 0 22 7 0 0
## 7 Argenti… 2019 PepsiCo 0 0 0 0 21 6 0 0
## 8 Argenti… 2019 Casoni 0 0 0 0 26 0 0 0
## 9 Argenti… 2019 Villa Del Sur… 0 0 0 0 19 1 0 0
## 10 Argenti… 2019 Manaos 0 0 0 0 14 4 0 0
## # ℹ 13,370 more rows
## # ℹ 3 more variables: grand_total <dbl>, num_events <dbl>, volunteers <dbl>
Tidy data
plastic_tidy <- plastic_raw %>%
tidyr::pivot_longer(
cols = c(empty, hdpe, ldpe, o, pet, pp, ps, pvc, grand_total, num_events),
names_to = "plastic_type",
values_to = "count"
)
plastic_tidy
## # A tibble: 133,800 × 6
## country year parent_company volunteers plastic_type count
## <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 Argentina 2019 Grand Total 243 empty 0
## 2 Argentina 2019 Grand Total 243 hdpe 215
## 3 Argentina 2019 Grand Total 243 ldpe 55
## 4 Argentina 2019 Grand Total 243 o 607
## 5 Argentina 2019 Grand Total 243 pet 1376
## 6 Argentina 2019 Grand Total 243 pp 281
## 7 Argentina 2019 Grand Total 243 ps 116
## 8 Argentina 2019 Grand Total 243 pvc 18
## 9 Argentina 2019 Grand Total 243 grand_total 2668
## 10 Argentina 2019 Grand Total 243 num_events 4
## # ℹ 133,790 more rows
Select and filter
plastic_simple <- plastic_tidy %>%
select(country, year, parent_company, plastic_type, count)
plastic_arg_2019 <- plastic_simple %>%
filter(country == "Argentina", year == 2019, !is.na(count))
plastic_simple
## # A tibble: 133,800 × 5
## country year parent_company plastic_type count
## <chr> <dbl> <chr> <chr> <dbl>
## 1 Argentina 2019 Grand Total empty 0
## 2 Argentina 2019 Grand Total hdpe 215
## 3 Argentina 2019 Grand Total ldpe 55
## 4 Argentina 2019 Grand Total o 607
## 5 Argentina 2019 Grand Total pet 1376
## 6 Argentina 2019 Grand Total pp 281
## 7 Argentina 2019 Grand Total ps 116
## 8 Argentina 2019 Grand Total pvc 18
## 9 Argentina 2019 Grand Total grand_total 2668
## 10 Argentina 2019 Grand Total num_events 4
## # ℹ 133,790 more rows
plastic_arg_2019
## # A tibble: 2,750 × 5
## country year parent_company plastic_type count
## <chr> <dbl> <chr> <chr> <dbl>
## 1 Argentina 2019 Grand Total empty 0
## 2 Argentina 2019 Grand Total hdpe 215
## 3 Argentina 2019 Grand Total ldpe 55
## 4 Argentina 2019 Grand Total o 607
## 5 Argentina 2019 Grand Total pet 1376
## 6 Argentina 2019 Grand Total pp 281
## 7 Argentina 2019 Grand Total ps 116
## 8 Argentina 2019 Grand Total pvc 18
## 9 Argentina 2019 Grand Total grand_total 2668
## 10 Argentina 2019 Grand Total num_events 4
## # ℹ 2,740 more rows
Group by country and year
total_by_country_year <- plastic_simple %>%
group_by(country, year) %>%
summarize(total_count = sum(count, na.rm = TRUE), .groups = "drop")
total_by_country_year
## # A tibble: 107 × 3
## country year total_count
## <chr> <dbl> <dbl>
## 1 Argentina 2019 11772
## 2 Argentina 2020 7596
## 3 Armenia 2020 20
## 4 Australia 2019 14
## 5 Australia 2020 3957
## 6 Bangladesh 2019 90
## 7 Bangladesh 2020 4908
## 8 Benin 2019 19954
## 9 Benin 2020 688
## 10 Bhutan 2019 14005
## # ℹ 97 more rows
Mutate example
plastic_mutated <- plastic_arg_2019 %>%
mutate(
large_count = count > 100,
country_lower = tolower(country)
)
plastic_mutated
## # A tibble: 2,750 × 7
## country year parent_company plastic_type count large_count country_lower
## <chr> <dbl> <chr> <chr> <dbl> <lgl> <chr>
## 1 Argentina 2019 Grand Total empty 0 FALSE argentina
## 2 Argentina 2019 Grand Total hdpe 215 TRUE argentina
## 3 Argentina 2019 Grand Total ldpe 55 FALSE argentina
## 4 Argentina 2019 Grand Total o 607 TRUE argentina
## 5 Argentina 2019 Grand Total pet 1376 TRUE argentina
## 6 Argentina 2019 Grand Total pp 281 TRUE argentina
## 7 Argentina 2019 Grand Total ps 116 TRUE argentina
## 8 Argentina 2019 Grand Total pvc 18 FALSE argentina
## 9 Argentina 2019 Grand Total grand_total 2668 TRUE argentina
## 10 Argentina 2019 Grand Total num_events 4 FALSE argentina
## # ℹ 2,740 more rows
Summary for 2019
plastic_2019_summary <- total_by_country_year %>%
filter(year == 2019)
plastic_2019_summary
## # A tibble: 52 × 3
## country year total_count
## <chr> <dbl> <dbl>
## 1 Argentina 2019 11772
## 2 Australia 2019 14
## 3 Bangladesh 2019 90
## 4 Benin 2019 19954
## 5 Bhutan 2019 14005
## 6 Brazil 2019 19338
## 7 Bulgaria 2019 69
## 8 Burkina Faso 2019 23299
## 9 Cameroon 2019 35900
## 10 Canada 2019 30
## # ℹ 42 more rows
Histogram: distribution of plastic counts per record
plastic_simple %>%
filter(!is.na(count), count > 0) %>%
ggplot(aes(x = count)) +
geom_histogram(bins = 40, fill = "#3182bd", color = "white") +
scale_x_log10() +
labs(
title = "Distribution of plastic counts",
subtitle = "Positive counts per plastic type entry across all countries and years",
x = "Count",
y = "Number of records"
) +
theme_minimal()

Bar chart: top parent companies in Argentina (2019)
plastic_arg_2019 %>%
group_by(parent_company) %>%
summarize(total_count = sum(count), .groups = "drop") %>%
slice_max(total_count, n = 10) %>%
mutate(parent_company = fct_reorder(parent_company, total_count)) %>%
ggplot(aes(x = parent_company, y = total_count)) +
geom_col(fill = "#31a354") +
coord_flip() +
labs(
title = "Top parent companies collecting plastic in Argentina, 2019",
x = "Parent company",
y = "Total plastic count"
) +
theme_minimal()

Scatter plot: total PET counts by parent company and year
pet_by_company <- plastic_tidy %>%
filter(plastic_type == "pet") %>%
group_by(year, parent_company) %>%
summarize(total_pet = sum(count, na.rm = TRUE), .groups = "drop") %>%
filter(total_pet > 0)
pet_by_company %>%
group_by(year) %>%
slice_max(total_pet, n = 10) %>%
ggplot(aes(
x = reorder(parent_company, total_pet),
y = total_pet,
fill = factor(year)
)) +
geom_col() +
coord_flip() +
scale_y_continuous(labels = scales::comma) +
labs(
title = "Top Parent Companies Producing PET Waste Each Year",
subtitle = "PET is among the most commonly recycled but still environmentally persistent plastics",
x = "Parent Company",
y = "Total PET Count",
fill = "Year"
) +
theme_minimal() +
theme(legend.position = "bottom")

dplyr and data.table summaries
# dplyr
summ_dplyr <- plastic_tidy %>%
group_by(plastic_type) %>%
summarize(total = sum(count, na.rm = TRUE), .groups = "drop")
summ_dplyr
## # A tibble: 10 × 2
## plastic_type total
## <chr> <dbl>
## 1 empty 4174
## 2 grand_total 1204956
## 3 hdpe 35742
## 4 ldpe 116640
## 5 num_events 446488
## 6 o 650583
## 7 pet 275700
## 8 pp 97696
## 9 ps 21243
## 10 pvc 3169
# data.table
plastic_dt <- as.data.table(plastic_tidy)
summ_dt <- plastic_dt[, .(total = sum(count, na.rm = TRUE)), by = plastic_type]
summ_dt
## plastic_type total
## <char> <num>
## 1: empty 4174
## 2: hdpe 35742
## 3: ldpe 116640
## 4: o 650583
## 5: pet 275700
## 6: pp 97696
## 7: ps 21243
## 8: pvc 3169
## 9: grand_total 1204956
## 10: num_events 446488
Top plastic types
# dplyr
summ_dplyr %>%
arrange(desc(total))
## # A tibble: 10 × 2
## plastic_type total
## <chr> <dbl>
## 1 grand_total 1204956
## 2 o 650583
## 3 num_events 446488
## 4 pet 275700
## 5 ldpe 116640
## 6 pp 97696
## 7 hdpe 35742
## 8 ps 21243
## 9 empty 4174
## 10 pvc 3169
# data.table
summ_dt[order(-total)]
## plastic_type total
## <char> <num>
## 1: grand_total 1204956
## 2: o 650583
## 3: num_events 446488
## 4: pet 275700
## 5: ldpe 116640
## 6: pp 97696
## 7: hdpe 35742
## 8: ps 21243
## 9: empty 4174
## 10: pvc 3169