Introduction

Questions

Variation

Visualizing distributions

# Load and clean data
data <- read_excel("GDP_vs_PlasticWaste_Analysis.xlsx") %>%
  clean_names()

# Histogram of GDP per capita
ggplot(data, aes(x = gdp_per_capita_usd)) +
  geom_histogram(binwidth = 5000, fill = "steelblue", color = "white") +
  labs(title = "Distribution of GDP per Capita", x = "GDP per Capita (USD)")

Typical values

summary(data$gdp_per_capita_usd)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     660    4406   12065   19184   29528  125141
summary(data$plastic_waste_per_capita_kg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01000 0.08775 0.14400 0.17282 0.22825 0.68600

Unusual values

# Outliers
data %>%
  filter(gdp_per_capita_usd > 100000 | plastic_waste_per_capita_kg > 150)
## # A tibble: 1 × 6
##   country gdp_per_capita_usd plastic_waste_per_capita_kg total_plastic_waste_g…¹
##   <chr>                <dbl>                       <dbl>                   <dbl>
## 1 Qatar               125141                        0.16                  103933
## # ℹ abbreviated name: ¹​total_plastic_waste_generation_tonnes
## # ℹ 2 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## #   managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>

Missing Values

# Count missing values
colSums(is.na(data))
##                                                            country 
##                                                                  0 
##                                                 gdp_per_capita_usd 
##                                                                  0 
##                                        plastic_waste_per_capita_kg 
##                                                                  0 
##                              total_plastic_waste_generation_tonnes 
##                                                                  0 
##                                    mismanaged_plastic_waste_tonnes 
##                                                                  0 
## managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills 
##                                                                  0

Covariation

A categorical and continuous variable

# Add GDP level as categorical
data <- data %>%
  mutate(gdp_level = case_when(
    gdp_per_capita_usd > 40000 ~ "High income",
    gdp_per_capita_usd > 15000 ~ "Middle income",
    TRUE ~ "Low income"
  ))

# Boxplot of plastic waste by GDP level
ggplot(data, aes(x = gdp_level, y = plastic_waste_per_capita_kg, fill = gdp_level)) +
  geom_boxplot() +
  labs(title = "Plastic Waste per Capita by GDP Level", y = "Plastic Waste per Capita (kg)")

Two categorical variables

# Categorize recycling rates
data <- data %>%
  mutate(recycling_efficiency = case_when(
    managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills > 1000000 ~ "High",
    managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills > 100000 ~ "Moderate",
    TRUE ~ "Low"
  ))

# Table of GDP level vs recycling efficiency
table(data$gdp_level, data$recycling_efficiency)
##                
##                 Low
##   High income    21
##   Low income     83
##   Middle income  46

Two continuous variables

# Scatterplot: GDP vs Plastic Waste per Capita
ggplot(data, aes(x = gdp_per_capita_usd, y = plastic_waste_per_capita_kg)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "GDP per Capita vs Plastic Waste per Capita")
## `geom_smooth()` using formula = 'y ~ x'

Patterns and models

# Categorize GDP to show model effect
data <- data %>%
  mutate(gdp_bracket = cut(gdp_per_capita_usd,
                           breaks = c(0, 15000, 40000, Inf),
                           labels = c("Low", "Middle", "High")))

# Bar chart showing average plastic waste per capita by GDP bracket
data %>%
  group_by(gdp_bracket) %>%
  summarise(avg_waste = mean(plastic_waste_per_capita_kg, na.rm = TRUE)) %>%
  ggplot(aes(x = gdp_bracket, y = avg_waste, fill = gdp_bracket)) +
  geom_col() +
  labs(title = "Average Plastic Waste per Capita by GDP Bracket",
       x = "GDP Bracket", y = "Avg Plastic Waste per Capita (kg)") +
  theme_minimal()

This bar chart helps illustrate the pattern between economic level and average plastic waste, highlighting whether wealthier countries produce more plastic waste per person.