About the Data

“Plastic pollution is a major and growing problem, negatively affecting oceans and wildlife health. Our World in Data has a lot of great data at the various levels including globally, per country, and over time.”

library("tidyverse")

Load Data

coast_vs_waste <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/coastal-population-vs-mismanaged-plastic.csv")

mismanaged_vs_gdp <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/per-capita-mismanaged-plastic-waste-vs-gdp-per-capita.csv")

waste_vs_gdp <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-21/per-capita-plastic-waste-vs-gdp-per-capita.csv")

Pare Data

Most of the data on waste comes from the year 2010 (missing for over 99 percent of observations). Here we will pare down the data to focus on observations where the waste measurements were recorded.

coast_vs_waste_tidy <- coast_vs_waste %>%
  filter(!is.na(`Mismanaged plastic waste (tonnes)`)) %>%
  filter(Code != "OWID_WRL")

mismanaged_vs_gdp_tidy <- mismanaged_vs_gdp %>%
  filter(!is.na(`Per capita mismanaged plastic waste (kilograms per person per day)`))

waste_vs_gdp_tidy <- waste_vs_gdp %>%
  filter(!is.na(`Per capita plastic waste (kilograms per person per day)`))

Merge Data

In order to avoid assuming which countries are represented in the clean data sets, we could merge the data.

joint_data <- inner_join(mismanaged_vs_gdp_tidy, waste_vs_gdp_tidy,
                         by = "Entity")

Sample Statistics

summary(joint_data$`Per capita mismanaged plastic waste (kilograms per person per day)`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00100 0.00600 0.03200 0.04523 0.06600 0.29900
summary(joint_data$`Per capita plastic waste (kilograms per person per day)`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0100  0.1030  0.1440  0.1976  0.2520  3.6000

Plot the Data

joint_data %>%
  ggplot(aes(x = `Per capita plastic waste (kilograms per person per day)`,
             y = `Per capita mismanaged plastic waste (kilograms per person per day)`,
             label = Code.x)) +
  geom_point() +
  geom_text(nudge_y = 0.015) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Plastic Waste Around the World",
       subtitle = "Year 2010",
       caption = "Source: Our World in Data",
       x = "per capita plastic waste",
       y = "per capita mismanaged plastic waste") +
  theme_minimal()

A More Focused Example

joint_data %>%
  top_n(10, `Per capita plastic waste (kilograms per person per day)`) %>%
  ggplot(aes(x = `Per capita plastic waste (kilograms per person per day)`,
             y = `Per capita mismanaged plastic waste (kilograms per person per day)`,
             label = Code.x)) +
  geom_point() +
  geom_text(nudge_y = 0.015) +
  labs(title = "Plastic Waste Around the World",
       subtitle = "Year 2010",
       caption = "Source: Our World in Data",
       x = "per capita plastic waste",
       y = "per capita mismanaged plastic waste") +
  theme_minimal()