Intro - What a Waste

This reports analyzes the relationship between Country Income and garbage collection / recycling. As noted by the World Bank, What a Waste is a global project to aggregate data on solid waste management from around the world and currently covers over 330 cities encompassing most countries. Data used is based on the latest release of datasets at the time of download.

Analysis

# Libraries
library(ggplot2)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Variables
waste_by_country <- read_csv("Country level dataset.csv")
## Rows: 217 Columns: 51
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (10): iso3c, region_id, country_name, income_id, other_information_infor...
## dbl (41): gdp, composition_food_organic_waste_percent, composition_glass_per...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Sanitize (improve legibility & remove NA)
waste_by_country$income_id[waste_by_country$income_id == "HIC"] <- "High Income"
waste_by_country$income_id[waste_by_country$income_id == "UMC"] <- "Upper Middle Income"
waste_by_country$income_id[waste_by_country$income_id == "LMC"] <- "Lower Middle Income"
waste_by_country$income_id[waste_by_country$income_id == "LIC"] <- "Low Income"

# Convert to factor and reorder levels
waste_by_country$income_id <- as.factor(waste_by_country$income_id)
waste_by_country$income_id <- ordered(waste_by_country$income_id,
                                      levels=c("High Income",
                                               "Upper Middle Income",
                                               "Lower Middle Income",
                                               "Low Income"))

# Subsets
clean_waste_coverage <- waste_by_country[!is.na(waste_by_country$waste_collection_coverage_total_percent_of_waste),]
clean_recycle_coverage <- waste_by_country[!is.na(waste_by_country$waste_treatment_recycling_percent),]

Summarize

# View countries in order
summarize(waste_by_country[order(waste_by_country$country_name),],country_name)
## # A tibble: 217 x 1
##    country_name       
##    <chr>              
##  1 Afghanistan        
##  2 Albania            
##  3 Algeria            
##  4 American Samoa     
##  5 Andorra            
##  6 Angola             
##  7 Antigua and Barbuda
##  8 Argentina          
##  9 Armenia            
## 10 Aruba              
## # ... with 207 more rows

Count

# Count income id types
count(waste_by_country,income_id, name = "countries")
## # A tibble: 4 x 2
##   income_id           countries
##   <ord>                   <int>
## 1 High Income                81
## 2 Upper Middle Income        56
## 3 Lower Middle Income        47
## 4 Low Income                 33

Group By

# Aggregate by income_id and summarize
# Waste Collection
clean_waste_coverage %>% 
  group_by(income_id) %>%
  summarize(countries=n(),
            gdp_per_capita=round(mean(gdp),1),
            waste_collection_pct=round(mean(waste_collection_coverage_total_percent_of_waste),1))
## # A tibble: 4 x 4
##   income_id           countries gdp_per_capita waste_collection_pct
##   <ord>                   <int>          <dbl>                <dbl>
## 1 High Income                28         45999.                 96.7
## 2 Upper Middle Income        12         20326.                 84  
## 3 Lower Middle Income         5          4594.                 59.8
## 4 Low Income                  5          1959.                 47.4
# Recycling
clean_recycle_coverage %>% 
  group_by(income_id) %>%
  summarize(countries=n(),
            gdp_per_capita=round(mean(gdp),1),
            recycling_coverage_pct=round(mean(waste_treatment_recycling_percent),1))
## # A tibble: 4 x 4
##   income_id           countries gdp_per_capita recycling_coverage_pct
##   <ord>                   <int>          <dbl>                  <dbl>
## 1 High Income                60         49994.                   25.5
## 2 Upper Middle Income        31         15001.                    9.7
## 3 Lower Middle Income        22          6839.                   10.7
## 4 Low Income                 11          2956.                    7.9

Plots

# Waste Collection
ggplot(data=clean_waste_coverage) + 
  geom_boxplot(
    mapping=aes(
      x=gdp,
      y=waste_collection_coverage_total_percent_of_waste,
      group=income_id,
      fill=income_id
      )
    ) + 
  labs(x="GDP per Capita",y="% Waste Collected",title = "GDP per Capita and % Waste Collected grouped by Income ID")

# Recycling
ggplot(data=clean_recycle_coverage) + 
  geom_boxplot(
    mapping=aes(
      x=gdp,
      y=waste_treatment_recycling_percent,
      group=income_id,
      fill=income_id
      )
    ) + 
  labs(x="GDP per Capita",y="% Waste Recycled",title = "GDP per Capita and % Waste Recycled grouped by Income ID")

Conclusion

When visualized, the relationship between higher Country Income and garbage collection / recycling is clear: wealthier countries tend to collect and recycle significantly more garbage. However, even among the High Income bracket recycling rates on average barely surpass 25 percent. This is concerning especially considering the large gap (~15%) in garbage collection between the Upper Middle Income and the Lower Middle Income groups. As more countries continue to industrialize and produce larger amounts of waste, how much of the garbage is collected and subsequently recycled will be an important metric to follow.