# Import data
df <- read_excel("GDP_vs_PlasticWaste_Analysis.xlsx")
head(df)
## # A tibble: 6 × 6
## Country `GDP per Capita (USD)` Plastic Waste per Ca…¹ Total Plastic Waste …²
## <chr> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364
## 2 Algeria 12871 0.144 1898343
## 3 Angola 5898 0.062 528843
## 4 Antigua … 19213 0.66 2753550
## 5 Argentina 18712 0.183 2753550
## 6 Aruba 35974 0.252 9352
## # ℹ abbreviated names: ¹`Plastic Waste per Capita (kg)`,
## # ²`Total Plastic Waste Generation (tonnes)`
## # ℹ 2 more variables: `Mismanaged Plastic Waste (tonnes)` <dbl>,
## # `Managed Plastic Waste (tonnes) (recycled, incinerated, sealed landfills)` <dbl>
Case of numeric variables: Get mean of numeric columns
# Select only numeric columns
numeric_df <- df %>% select(where(is.numeric))
# Use map_dbl to get column means
col_means <- map_dbl(numeric_df, mean, na.rm = TRUE)
col_means
## GDP per Capita (USD)
## 1.918362e+04
## Plastic Waste per Capita (kg)
## 1.728200e-01
## Total Plastic Waste Generation (tonnes)
## 1.778347e+06
## Mismanaged Plastic Waste (tonnes)
## 2.617005e+05
## Managed Plastic Waste (tonnes) (recycled, incinerated, sealed landfills)
## 8.129179e-01
# Function to calculate coefficient of variation
cv <- function(x) sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE)
# Apply it to numeric columns
map_dbl(numeric_df, cv)
## GDP per Capita (USD)
## 1.0492355
## Plastic Waste per Capita (kg)
## 0.7568712
## Total Plastic Waste Generation (tonnes)
## 3.3505736
## Mismanaged Plastic Waste (tonnes)
## 4.0802447
## Managed Plastic Waste (tonnes) (recycled, incinerated, sealed landfills)
## 0.2718205
When you have a grouping variable (factor)
# Adjust column names to your actual data
df %>%
group_by(Country) %>%
summarise(
avg_gdp = mean(`GDP per Capita (USD)`, na.rm = TRUE),
avg_plastic = mean(`Plastic Waste per Capita (kg)`, na.rm = TRUE),
.groups = "drop"
)
## # A tibble: 150 × 3
## Country avg_gdp avg_plastic
## <chr> <dbl> <dbl>
## 1 Albania 9927 0.069
## 2 Algeria 12871 0.144
## 3 Angola 5898 0.062
## 4 Antigua and Barbuda 19213 0.66
## 5 Argentina 18712 0.183
## 6 Aruba 35974 0.252
## 7 Australia 41464 0.112
## 8 Bahamas 29222 0.39
## 9 Bahrain 40571 0.132
## 10 Bangladesh 2443 0.034
## # ℹ 140 more rows
Choose either one of the two cases above and apply it to your data
# Normalize numeric columns using a custom function
normalize <- function(x) (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
normalized_data <- numeric_df %>% map_df(normalize)
head(normalized_data)
## # A tibble: 6 × 5
## `GDP per Capita (USD)` `Plastic Waste per Capita (kg)` Total Plastic Waste G…¹
## <dbl> <dbl> <dbl>
## 1 0.0744 0.0873 0.00123
## 2 0.0981 0.198 0.0321
## 3 0.0421 0.0769 0.00894
## 4 0.149 0.962 0.0466
## 5 0.145 0.256 0.0466
## 6 0.284 0.358 0.000149
## # ℹ abbreviated name: ¹`Total Plastic Waste Generation (tonnes)`
## # ℹ 2 more variables: `Mismanaged Plastic Waste (tonnes)` <dbl>,
## # `Managed Plastic Waste (tonnes) (recycled, incinerated, sealed landfills)` <dbl>