Import data

# Import the Excel file
data <- read_excel("GDP_vs_PlasticWaste_Analysis.xlsx")

# Rename columns (adjust as needed based on real headers)
colnames(data) <- c("Country", "GDP", "Waste", "Recycling", "Continent", "Income")

# Show data
head(data)
## # A tibble: 6 × 6
##   Country               GDP Waste Recycling Continent Income
##   <chr>               <dbl> <dbl>     <dbl>     <dbl>  <dbl>
## 1 Albania              9927 0.069     73364     69833 0.0481
## 2 Algeria             12871 0.144   1898343    764578 0.597 
## 3 Angola               5898 0.062    528843    236946 0.552 
## 4 Antigua and Barbuda 19213 0.66    2753550       627 1.00  
## 5 Argentina           18712 0.183   2753550    465808 0.831 
## 6 Aruba               35974 0.252      9352      3967 0.576

Filter rows

# Filter for countries with GDP over 40,000 and Waste over 100
high_gdp_waste <- filter(data, GDP > 40000, Waste > 100)
high_gdp_waste
## # A tibble: 0 × 6
## # ℹ 6 variables: Country <chr>, GDP <dbl>, Waste <dbl>, Recycling <dbl>,
## #   Continent <dbl>, Income <dbl>

Arrange rows

# Arrange countries by descending recycling rate
arranged_data <- arrange(data, desc(Recycling))
arranged_data
## # A tibble: 150 × 6
##    Country         GDP Waste Recycling Continent Income
##    <chr>         <dbl> <dbl>     <dbl>     <dbl>  <dbl>
##  1 China          9526 0.121  59079741  12272200  0.792
##  2 United States 49374 0.335  37825550    267469  0.993
##  3 Germany       40429 0.485  14476561     50676  0.996
##  4 Brazil        14538 0.165  11852055   3296700  0.722
##  5 Japan         35750 0.171   7993489     35684  0.996
##  6 Pakistan       4284 0.103   6412210   1346460  0.790
##  7 Russia        23108 0.112   5839685    363389  0.938
##  8 Turkey        17959 0.212   5596657   1656110  0.704
##  9 Egypt          9857 0.178   5464471   1435510  0.737
## 10 Indonesia      8433 0.057   5045714    824234  0.837
## # ℹ 140 more rows

Select columns

# Select only Country, GDP, and Waste columns
selected_data <- select(data, Country, GDP, Waste)
selected_data
## # A tibble: 150 × 3
##    Country               GDP Waste
##    <chr>               <dbl> <dbl>
##  1 Albania              9927 0.069
##  2 Algeria             12871 0.144
##  3 Angola               5898 0.062
##  4 Antigua and Barbuda 19213 0.66 
##  5 Argentina           18712 0.183
##  6 Aruba               35974 0.252
##  7 Australia           41464 0.112
##  8 Bahamas             29222 0.39 
##  9 Bahrain             40571 0.132
## 10 Bangladesh           2443 0.034
## # ℹ 140 more rows

Add columns

# Create a new column for Waste-to-GDP ratio
data_with_ratio <- mutate(data, Waste_per_GDP = Waste / GDP * 1000)
data_with_ratio
## # A tibble: 150 × 7
##    Country               GDP Waste Recycling Continent Income Waste_per_GDP
##    <chr>               <dbl> <dbl>     <dbl>     <dbl>  <dbl>         <dbl>
##  1 Albania              9927 0.069     73364     69833 0.0481       0.00695
##  2 Algeria             12871 0.144   1898343    764578 0.597        0.0112 
##  3 Angola               5898 0.062    528843    236946 0.552        0.0105 
##  4 Antigua and Barbuda 19213 0.66    2753550       627 1.00         0.0344 
##  5 Argentina           18712 0.183   2753550    465808 0.831        0.00978
##  6 Aruba               35974 0.252      9352      3967 0.576        0.00701
##  7 Australia           41464 0.112    900658      5266 0.994        0.00270
##  8 Bahamas             29222 0.39      51364      2212 0.957        0.0133 
##  9 Bahrain             40571 0.132     59785      1043 0.983        0.00325
## 10 Bangladesh           2443 0.034   1888170   1021990 0.459        0.0139 
## # ℹ 140 more rows

Summarize by groups

# Group by Continent and calculate average recycling rate and waste
group_summary <- data %>%
  group_by(Continent) %>%
  summarise(
    Avg_Recycling = mean(Recycling, na.rm = TRUE),
    Avg_Waste = mean(Waste, na.rm = TRUE),
    Countries = n()
  )
group_summary
## # A tibble: 144 × 4
##    Continent Avg_Recycling Avg_Waste Countries
##        <dbl>         <dbl>     <dbl>     <int>
##  1         0        18291      0.242         6
##  2         3         3263      0.252         1
##  3        16        18116.     0.168         2
##  4        33        11730      0.358         1
##  5        60        43134      0.322         1
##  6        74         3859      0.103         1
##  7        97        12280      0.654         1
##  8       116         1076      0.144         1
##  9       151        32620      0.281         1
## 10       259        32377      0.214         1
## # ℹ 134 more rows

5. Visualizations

GDP vs Plastic Waste

ggplot(data, aes(x = GDP, y = Waste, color = Continent)) +
  geom_point(size = 3, alpha = 0.8) +
  labs(title = "GDP vs Plastic Waste per Capita",
       x = "GDP per Capita (USD)",
       y = "Plastic Waste per Capita (kg)") +
  theme_minimal()

GDP vs Recycling Rate

ggplot(data, aes(x = GDP, y = Recycling, color = Continent)) +
  geom_point(size = 3, alpha = 0.8) +
  labs(title = "GDP vs Recycling Rate",
       x = "GDP per Capita (USD)",
       y = "Recycling Rate (%)") +
  theme_minimal()

Interactive Plot: GDP vs Waste

plot_ly(data, x = ~GDP, y = ~Waste, type = 'scatter', mode = 'markers',
        color = ~Continent, hovertext = ~Country,
        marker = list(size = 10)) %>%
  layout(title = "GDP vs Plastic Waste (Interactive)",
         xaxis = list(title = "GDP per Capita (USD)"),
         yaxis = list(title = "Plastic Waste per Capita (kg)"))