# Load and clean data
data <- read_excel("GDP_vs_PlasticWaste_Analysis.xlsx") %>%
clean_names()
# Histogram of GDP per capita
ggplot(data, aes(x = gdp_per_capita_usd)) +
geom_histogram(binwidth = 5000, fill = "steelblue", color = "white") +
labs(title = "Distribution of GDP per Capita", x = "GDP per Capita (USD)")
summary(data$gdp_per_capita_usd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 660 4406 12065 19184 29528 125141
summary(data$plastic_waste_per_capita_kg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01000 0.08775 0.14400 0.17282 0.22825 0.68600
# Outliers
data %>%
filter(gdp_per_capita_usd > 100000 | plastic_waste_per_capita_kg > 150)
## # A tibble: 1 × 6
## country gdp_per_capita_usd plastic_waste_per_capita_kg total_plastic_waste_g…¹
## <chr> <dbl> <dbl> <dbl>
## 1 Qatar 125141 0.16 103933
## # ℹ abbreviated name: ¹​total_plastic_waste_generation_tonnes
## # ℹ 2 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## # managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>
# Count missing values
colSums(is.na(data))
## country
## 0
## gdp_per_capita_usd
## 0
## plastic_waste_per_capita_kg
## 0
## total_plastic_waste_generation_tonnes
## 0
## mismanaged_plastic_waste_tonnes
## 0
## managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills
## 0
# Add GDP level as categorical
data <- data %>%
mutate(gdp_level = case_when(
gdp_per_capita_usd > 40000 ~ "High income",
gdp_per_capita_usd > 15000 ~ "Middle income",
TRUE ~ "Low income"
))
# Boxplot of plastic waste by GDP level
ggplot(data, aes(x = gdp_level, y = plastic_waste_per_capita_kg, fill = gdp_level)) +
geom_boxplot() +
labs(title = "Plastic Waste per Capita by GDP Level", y = "Plastic Waste per Capita (kg)")
# Categorize recycling rates
data <- data %>%
mutate(recycling_efficiency = case_when(
managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills > 1000000 ~ "High",
managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills > 100000 ~ "Moderate",
TRUE ~ "Low"
))
# Table of GDP level vs recycling efficiency
table(data$gdp_level, data$recycling_efficiency)
##
## Low
## High income 21
## Low income 83
## Middle income 46
# Scatterplot: GDP vs Plastic Waste per Capita
ggplot(data, aes(x = gdp_per_capita_usd, y = plastic_waste_per_capita_kg)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(title = "GDP per Capita vs Plastic Waste per Capita")
## `geom_smooth()` using formula = 'y ~ x'
# Categorize GDP to show model effect
data <- data %>%
mutate(gdp_bracket = cut(gdp_per_capita_usd,
breaks = c(0, 15000, 40000, Inf),
labels = c("Low", "Middle", "High")))
# Bar chart showing average plastic waste per capita by GDP bracket
data %>%
group_by(gdp_bracket) %>%
summarise(avg_waste = mean(plastic_waste_per_capita_kg, na.rm = TRUE)) %>%
ggplot(aes(x = gdp_bracket, y = avg_waste, fill = gdp_bracket)) +
geom_col() +
labs(title = "Average Plastic Waste per Capita by GDP Bracket",
x = "GDP Bracket", y = "Avg Plastic Waste per Capita (kg)") +
theme_minimal()
This bar chart helps illustrate the pattern between economic level and average plastic waste, highlighting whether wealthier countries produce more plastic waste per person.