# Import the Excel file
data <- read_excel("GDP_vs_PlasticWaste_Analysis.xlsx")
# Rename columns (adjust as needed based on real headers)
colnames(data) <- c("Country", "GDP", "Waste", "Recycling", "Continent", "Income")
# Show data
head(data)
## # A tibble: 6 × 6
## Country GDP Waste Recycling Continent Income
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364 69833 0.0481
## 2 Algeria 12871 0.144 1898343 764578 0.597
## 3 Angola 5898 0.062 528843 236946 0.552
## 4 Antigua and Barbuda 19213 0.66 2753550 627 1.00
## 5 Argentina 18712 0.183 2753550 465808 0.831
## 6 Aruba 35974 0.252 9352 3967 0.576
# Filter for countries with GDP over 40,000 and Waste over 100
high_gdp_waste <- filter(data, GDP > 40000, Waste > 100)
high_gdp_waste
## # A tibble: 0 × 6
## # ℹ 6 variables: Country <chr>, GDP <dbl>, Waste <dbl>, Recycling <dbl>,
## # Continent <dbl>, Income <dbl>
# Arrange countries by descending recycling rate
arranged_data <- arrange(data, desc(Recycling))
arranged_data
## # A tibble: 150 × 6
## Country GDP Waste Recycling Continent Income
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China 9526 0.121 59079741 12272200 0.792
## 2 United States 49374 0.335 37825550 267469 0.993
## 3 Germany 40429 0.485 14476561 50676 0.996
## 4 Brazil 14538 0.165 11852055 3296700 0.722
## 5 Japan 35750 0.171 7993489 35684 0.996
## 6 Pakistan 4284 0.103 6412210 1346460 0.790
## 7 Russia 23108 0.112 5839685 363389 0.938
## 8 Turkey 17959 0.212 5596657 1656110 0.704
## 9 Egypt 9857 0.178 5464471 1435510 0.737
## 10 Indonesia 8433 0.057 5045714 824234 0.837
## # ℹ 140 more rows
# Select only Country, GDP, and Waste columns
selected_data <- select(data, Country, GDP, Waste)
selected_data
## # A tibble: 150 × 3
## Country GDP Waste
## <chr> <dbl> <dbl>
## 1 Albania 9927 0.069
## 2 Algeria 12871 0.144
## 3 Angola 5898 0.062
## 4 Antigua and Barbuda 19213 0.66
## 5 Argentina 18712 0.183
## 6 Aruba 35974 0.252
## 7 Australia 41464 0.112
## 8 Bahamas 29222 0.39
## 9 Bahrain 40571 0.132
## 10 Bangladesh 2443 0.034
## # ℹ 140 more rows
# Create a new column for Waste-to-GDP ratio
data_with_ratio <- mutate(data, Waste_per_GDP = Waste / GDP * 1000)
data_with_ratio
## # A tibble: 150 × 7
## Country GDP Waste Recycling Continent Income Waste_per_GDP
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364 69833 0.0481 0.00695
## 2 Algeria 12871 0.144 1898343 764578 0.597 0.0112
## 3 Angola 5898 0.062 528843 236946 0.552 0.0105
## 4 Antigua and Barbuda 19213 0.66 2753550 627 1.00 0.0344
## 5 Argentina 18712 0.183 2753550 465808 0.831 0.00978
## 6 Aruba 35974 0.252 9352 3967 0.576 0.00701
## 7 Australia 41464 0.112 900658 5266 0.994 0.00270
## 8 Bahamas 29222 0.39 51364 2212 0.957 0.0133
## 9 Bahrain 40571 0.132 59785 1043 0.983 0.00325
## 10 Bangladesh 2443 0.034 1888170 1021990 0.459 0.0139
## # ℹ 140 more rows
# Group by Continent and calculate average recycling rate and waste
group_summary <- data %>%
group_by(Continent) %>%
summarise(
Avg_Recycling = mean(Recycling, na.rm = TRUE),
Avg_Waste = mean(Waste, na.rm = TRUE),
Countries = n()
)
group_summary
## # A tibble: 144 × 4
## Continent Avg_Recycling Avg_Waste Countries
## <dbl> <dbl> <dbl> <int>
## 1 0 18291 0.242 6
## 2 3 3263 0.252 1
## 3 16 18116. 0.168 2
## 4 33 11730 0.358 1
## 5 60 43134 0.322 1
## 6 74 3859 0.103 1
## 7 97 12280 0.654 1
## 8 116 1076 0.144 1
## 9 151 32620 0.281 1
## 10 259 32377 0.214 1
## # ℹ 134 more rows
GDP vs Plastic Waste
ggplot(data, aes(x = GDP, y = Waste, color = Continent)) +
geom_point(size = 3, alpha = 0.8) +
labs(title = "GDP vs Plastic Waste per Capita",
x = "GDP per Capita (USD)",
y = "Plastic Waste per Capita (kg)") +
theme_minimal()
GDP vs Recycling Rate
ggplot(data, aes(x = GDP, y = Recycling, color = Continent)) +
geom_point(size = 3, alpha = 0.8) +
labs(title = "GDP vs Recycling Rate",
x = "GDP per Capita (USD)",
y = "Recycling Rate (%)") +
theme_minimal()
Interactive Plot: GDP vs Waste
plot_ly(data, x = ~GDP, y = ~Waste, type = 'scatter', mode = 'markers',
color = ~Continent, hovertext = ~Country,
marker = list(size = 10)) %>%
layout(title = "GDP vs Plastic Waste (Interactive)",
xaxis = list(title = "GDP per Capita (USD)"),
yaxis = list(title = "Plastic Waste per Capita (kg)"))