data <- read_excel("GDP_vs_PlasticWaste_Analysis.xlsx") %>%
clean_names()
head(data)
## # A tibble: 6 × 6
## country gdp_per_capita_usd plastic_waste_per_ca…¹ total_plastic_waste_…²
## <chr> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364
## 2 Algeria 12871 0.144 1898343
## 3 Angola 5898 0.062 528843
## 4 Antigua and … 19213 0.66 2753550
## 5 Argentina 18712 0.183 2753550
## 6 Aruba 35974 0.252 9352
## # ℹ abbreviated names: ¹plastic_waste_per_capita_kg,
## # ²total_plastic_waste_generation_tonnes
## # ℹ 2 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## # managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>
# Convert columns for waste types to long format
waste_long <- data %>%
pivot_longer(cols = c(total_plastic_waste_generation_tonnes,
mismanaged_plastic_waste_tonnes,
managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills),
names_to = "waste_type",
values_to = "waste_amount")
head(waste_long)
## # A tibble: 6 × 5
## country gdp_per_capita_usd plastic_waste_per_capita_kg waste_type waste_amount
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 Albania 9927 0.069 total_pla… 73364
## 2 Albania 9927 0.069 mismanage… 69833
## 3 Albania 9927 0.069 managed_p… 0.0481
## 4 Algeria 12871 0.144 total_pla… 1898343
## 5 Algeria 12871 0.144 mismanage… 764578
## 6 Algeria 12871 0.144 managed_p… 0.597
# Now pivot the long format back to wide
waste_long %>%
pivot_wider(names_from = waste_type, values_from = waste_amount)
## # A tibble: 150 × 6
## country gdp_per_capita_usd plastic_waste_per_ca…¹ total_plastic_waste_…²
## <chr> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364
## 2 Algeria 12871 0.144 1898343
## 3 Angola 5898 0.062 528843
## 4 Antigua and… 19213 0.66 2753550
## 5 Argentina 18712 0.183 2753550
## 6 Aruba 35974 0.252 9352
## 7 Australia 41464 0.112 900658
## 8 Bahamas 29222 0.39 51364
## 9 Bahrain 40571 0.132 59785
## 10 Bangladesh 2443 0.034 1888170
## # ℹ 140 more rows
## # ℹ abbreviated names: ¹plastic_waste_per_capita_kg,
## # ²total_plastic_waste_generation_tonnes
## # ℹ 2 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## # managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>
# Create a synthetic combined column to separate
data_sep <- data %>%
mutate(combined = paste0(gdp_per_capita_usd, " USD/", plastic_waste_per_capita_kg, " kg"))
data_sep %>%
separate(col = combined, into = c("gdp", "waste"), sep = "/")
## # A tibble: 150 × 8
## country gdp_per_capita_usd plastic_waste_per_ca…¹ total_plastic_waste_…²
## <chr> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364
## 2 Algeria 12871 0.144 1898343
## 3 Angola 5898 0.062 528843
## 4 Antigua and… 19213 0.66 2753550
## 5 Argentina 18712 0.183 2753550
## 6 Aruba 35974 0.252 9352
## 7 Australia 41464 0.112 900658
## 8 Bahamas 29222 0.39 51364
## 9 Bahrain 40571 0.132 59785
## 10 Bangladesh 2443 0.034 1888170
## # ℹ 140 more rows
## # ℹ abbreviated names: ¹plastic_waste_per_capita_kg,
## # ²total_plastic_waste_generation_tonnes
## # ℹ 4 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## # managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>,
## # gdp <chr>, waste <chr>
data_sep %>%
separate(col = combined, into = c("gdp", "waste"), sep = "/") %>%
unite(col = "combined_rate", c(gdp, waste), sep = " | ")
## # A tibble: 150 × 7
## country gdp_per_capita_usd plastic_waste_per_ca…¹ total_plastic_waste_…²
## <chr> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364
## 2 Algeria 12871 0.144 1898343
## 3 Angola 5898 0.062 528843
## 4 Antigua and… 19213 0.66 2753550
## 5 Argentina 18712 0.183 2753550
## 6 Aruba 35974 0.252 9352
## 7 Australia 41464 0.112 900658
## 8 Bahamas 29222 0.39 51364
## 9 Bahrain 40571 0.132 59785
## 10 Bangladesh 2443 0.034 1888170
## # ℹ 140 more rows
## # ℹ abbreviated names: ¹plastic_waste_per_capita_kg,
## # ²total_plastic_waste_generation_tonnes
## # ℹ 3 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## # managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>,
## # combined_rate <chr>
# Create a missing data example from the real data
example_missing <- data %>%
mutate(country = ifelse(row_number() %% 5 == 0, NA, country))
example_missing %>%
fill(country, .direction = "down")
## # A tibble: 150 × 6
## country gdp_per_capita_usd plastic_waste_per_ca…¹ total_plastic_waste_…²
## <chr> <dbl> <dbl> <dbl>
## 1 Albania 9927 0.069 73364
## 2 Algeria 12871 0.144 1898343
## 3 Angola 5898 0.062 528843
## 4 Antigua and… 19213 0.66 2753550
## 5 Antigua and… 18712 0.183 2753550
## 6 Aruba 35974 0.252 9352
## 7 Australia 41464 0.112 900658
## 8 Bahamas 29222 0.39 51364
## 9 Bahrain 40571 0.132 59785
## 10 Bahrain 2443 0.034 1888170
## # ℹ 140 more rows
## # ℹ abbreviated names: ¹plastic_waste_per_capita_kg,
## # ²total_plastic_waste_generation_tonnes
## # ℹ 2 more variables: mismanaged_plastic_waste_tonnes <dbl>,
## # managed_plastic_waste_tonnes_recycled_incinerated_sealed_landfills <dbl>