# loading libraries
library(tidyverse)
library(tidyquant)
library(scales)
library(ggrepel)
library(readxl)
library(readr)
library(janitor)
library(here)
library(rnaturalearth)
library(countrycode)
library(wbstats)CASD_1
Loading Libraries
Reading in Data
# read in data
ghg_emissions_in_LUCF <- read_csv("ghg_emissions_in_LUCF.csv")
ghg_emissions_ex_LUCF <- read_csv("ghg_emissions_ex_LUCF.csv")
energy_imports <- read_excel("energy_imports.xlsx", skip = 6)
total_imports <- read_excel("total_imports.xlsx", skip = 6)
total_exports <- read_excel("total_exports.xlsx", skip = 6)
gdp_ppp <- wb_data("NY.GDP.MKTP.PP.CD", start_date = 1995, end_date = 2019)Tidy Climate Watch Data
# copy in useful functions for countrycodes
country_name_regex_to_iso3c <- function(country_name) {
country_name |>
countrycode(origin = "country.name",
destination = "iso3c",
origin_regex = TRUE)
}
iso3c_to_country_name <- function(iso3c) {
iso3c |>
countrycode(origin = "iso3c", destination = "country.name")
}
# tidy Climate Watch data
# Climate Watch 1: ghg_emissions_excluding_LUCF
# longer format and timeframe to 1995-2019, years as integers
ghg_emissions_ex_LUCF_2 <- ghg_emissions_ex_LUCF %>%
select(`Country/Region`, unit, `1995`:`2019`) %>%
pivot_longer(
cols = `1995`:`2019`,
names_to = "year",
values_to = "GHG_excl_LUCF") %>%
mutate(year = as.integer(year))
#drop_na()
# add country names, country codes, and flags
ghg_emissions_ex_LUCF_3 <- ghg_emissions_ex_LUCF_2 %>%
mutate(iso3c = country_name_regex_to_iso3c(`Country/Region`)) %>%
mutate(country_name = iso3c_to_country_name(`iso3c`)) Warning: There was 1 warning in `mutate()`.
ℹ In argument: `iso3c = country_name_regex_to_iso3c(`Country/Region`)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: Data source, Micronesia
# problem with Micronesia
# problem with adding flags
# ghg_emissions_ex_LUCF_3 %>%
# mutate(flag = countrycode(sourcevar = iso3c,
# origin = iso3c,
# destination = "unicode.symbol"))
ghg_emissions_ex_LUCF_3 <- ghg_emissions_ex_LUCF_3 %>%
filter(unit != "Climate Watch")
# I think these data actually don't have regions unlike the OECD data
# move iso3c and country names to the front
ghg_emissions_ex_LUCF_4 <- ghg_emissions_ex_LUCF_3 %>%
relocate(iso3c, .before = 1) %>%
relocate(country_name, .before = 2)
final_ghg_ex_LUCF <- ghg_emissions_ex_LUCF_4
# Climate Watch 2: ghg emissions including LUCF
# longer format and time to 1995-2019, years as integers
ghg_emissions_in_LUCF_2 <- ghg_emissions_in_LUCF %>%
select(`Country/Region`, unit, `1995`:`2019`) %>%
pivot_longer(
cols = `1995`:`2019`,
names_to = "year",
values_to = "GHG_incl_LUCF") %>%
mutate(year = as.integer(year))
#drop_na()
# add country names, country codes, and flags
ghg_emissions_in_LUCF_3 <- ghg_emissions_in_LUCF_2 %>%
mutate(iso3c = country_name_regex_to_iso3c(`Country/Region`)) %>%
mutate(iso3c = ifelse(`Country/Region` == "Micronesia", "FSM", iso3c)) %>%
mutate(country_name = iso3c_to_country_name(`iso3c`)) Warning: There was 1 warning in `mutate()`.
ℹ In argument: `iso3c = country_name_regex_to_iso3c(`Country/Region`)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: Data source, Micronesia
# problem with Micronesia (iso3c and country_name don't show up)
# ghg_emissions_ex_LUCF_3 %>% filter (`Country/Region` == "Micronesia")
# problem with adding flags
# ghg_emissions_in_LUCF_3 %>%
# mutate(flag = countrycode(sourcevar = iso3c,
# origin = iso3c,
# destination = "unicode.symbol"))
ghg_emissions_in_LUCF_3 <- ghg_emissions_in_LUCF_3 %>%
filter(unit != "Climate Watch")
# I think these data actually don't have regions unlike the OECD data
# move iso3c and country names to the front
ghg_emissions_in_LUCF_4 <- ghg_emissions_in_LUCF_3 %>%
relocate(iso3c, .before = 2) %>%
relocate(country_name, .before = 1)
final_ghg_in_LUCF <- ghg_emissions_in_LUCF_4Tidy OECD data
Tidy Energy Imports
# tidy OECD data 1: energy imports
energy_imports_2 <- energy_imports |>
select(-`...2`) |>
filter(Time != "Country") %>%
rename(geography = Time)
geography_tbl <- energy_imports_2 |>
select(geography)
geography_processed <- geography_tbl |>
separate(col = geography, into = c("geography_code", "geography_name"),
sep = ":", remove = FALSE) |>
mutate(across(c("geography_code", "geography_name"), str_trim)) |>
mutate(country_name = countrycode(sourcevar = geography_code,
origin = "iso3c",
destination = "country.name")) |>
mutate(country_or_region = if_else(is.na(country_name),
true = "region",
false = "country")) |>
mutate(country_name = if_else(is.na(country_name),
true = geography_name,
false = country_name)) |>
select(geography, country_name, iso3c = geography_code, country_or_region)Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = countrycode(sourcevar = geography_code, origin =
"iso3c", destination = "country.name")`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC, ASEAN, DATA EXTRACTED ON 08 MAR 2023 20, EA19, EASIA, EU13, EU15, EU27_2020, EU28, G20, NONOECD, OECD, ROW, ZASI, ZEUR, ZNAM, ZOTH, ZSCA
energy_imports_processed <- geography_processed |>
left_join(energy_imports_2, by = "geography") |>
select(-geography) |>
pivot_longer(cols = matches("\\d{4}"),
names_to = "year",
names_transform = as.integer)
oecd_metadata_1 <- read_excel("energy_imports.xlsx",range = "a3:c6", col_names = c("metadata_parameter", "blank", "metadata_value")) |>
select(-blank) |>
pivot_wider(names_from = metadata_parameter, values_from = metadata_value)
my_oecd_metadata_1 <- tribble(~my_indicator_name,
"Emissions from Imported Electricity, Gas, and Heating/Cooling")
final_energy_imports <- energy_imports_processed |> bind_cols(oecd_metadata_1) |>
bind_cols(my_oecd_metadata_1) %>%
janitor::clean_names()
final_energy_imports # A tibble: 2,016 × 10
country_name iso3c count…¹ year value indic…² indus…³ partner unit my_in…⁴
<chr> <chr> <chr> <int> <dbl> <chr> <chr> <chr> <chr> <chr>
1 OECD member … OECD region 1995 439. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
2 OECD member … OECD region 1996 415. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
3 OECD member … OECD region 1997 414. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
4 OECD member … OECD region 1998 475. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
5 OECD member … OECD region 1999 551. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
6 OECD member … OECD region 2000 656. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
7 OECD member … OECD region 2001 569. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
8 OECD member … OECD region 2002 573. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
9 OECD member … OECD region 2003 689. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
10 OECD member … OECD region 2004 793. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
# … with 2,006 more rows, and abbreviated variable names ¹country_or_region,
# ²indicator, ³industry, ⁴my_indicator_name
Tidy Total Imports
total_imports_2 <- total_imports |>
select(-`...2`) |>
filter(Time != "Country") %>%
rename(geography = Time)
geography_tbl_2 <- total_imports_2 |>
select(geography)
geography_processed_2 <- geography_tbl_2 |>
separate(col = geography, into = c("geography_code", "geography_name"),
sep = ":", remove = FALSE) |>
mutate(across(c("geography_code", "geography_name"), str_trim)) |>
mutate(country_name = countrycode(sourcevar = geography_code,
origin = "iso3c",
destination = "country.name")) |>
mutate(country_or_region = if_else(is.na(country_name),
true = "region",
false = "country")) |>
mutate(country_name = if_else(is.na(country_name),
true = geography_name,
false = country_name)) |>
select(geography, country_name, iso3c = geography_code, country_or_region)Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = countrycode(sourcevar = geography_code, origin =
"iso3c", destination = "country.name")`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC, ASEAN, DATA EXTRACTED ON 08 MAR 2023 20, EA19, EASIA, EU13, EU15, EU27_2020, EU28, G20, NONOECD, OECD, ROW, ZASI, ZEUR, ZNAM, ZOTH, ZSCA
total_imports_processed <- geography_processed_2 |>
left_join(total_imports_2, by = "geography") |>
select(-geography) |>
pivot_longer(cols = matches("\\d{4}"),
names_to = "year",
names_transform = as.integer)
oecd_metadata_2 <- read_excel("total_imports.xlsx",range = "a3:c6", col_names = c("metadata_parameter", "blank", "metadata_value")) |>
select(-blank) |>
pivot_wider(names_from = metadata_parameter, values_from = metadata_value)
my_oecd_metadata_2 <- tribble(~my_indicator_name,
"Emissions from all Imports")
final_total_imports <- total_imports_processed |> bind_cols(oecd_metadata_2) |>
bind_cols(my_oecd_metadata_2) %>%
janitor::clean_names()
final_total_imports # A tibble: 2,016 × 10
country_name iso3c count…¹ year value indic…² indus…³ partner unit my_in…⁴
<chr> <chr> <chr> <int> <dbl> <chr> <chr> <chr> <chr> <chr>
1 OECD member … OECD region 1995 1454. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
2 OECD member … OECD region 1996 1385. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
3 OECD member … OECD region 1997 1369. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
4 OECD member … OECD region 1998 1554. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
5 OECD member … OECD region 1999 1729. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
6 OECD member … OECD region 2000 1953. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
7 OECD member … OECD region 2001 1818. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
8 OECD member … OECD region 2002 1884. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
9 OECD member … OECD region 2003 2149. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
10 OECD member … OECD region 2004 2482. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
# … with 2,006 more rows, and abbreviated variable names ¹country_or_region,
# ²indicator, ³industry, ⁴my_indicator_name
Tidy Total Export Data
# tidy total export data
total_exports_2 <- total_exports |>
select(-`...2`) |>
filter(Time != "Country") %>%
rename(geography = Time)
geography_tbl_3 <- total_exports_2 |>
select(geography)
geography_processed_3 <- geography_tbl_3 |>
separate(col = geography, into = c("geography_code", "geography_name"),
sep = ":", remove = FALSE) |>
mutate(across(c("geography_code", "geography_name"), str_trim)) |>
mutate(country_name = countrycode(sourcevar = geography_code,
origin = "iso3c",
destination = "country.name")) |>
mutate(country_or_region = if_else(is.na(country_name),
true = "region",
false = "country")) |>
mutate(country_name = if_else(is.na(country_name),
true = geography_name,
false = country_name)) |>
select(geography, country_name, iso3c = geography_code, country_or_region)Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = countrycode(sourcevar = geography_code, origin =
"iso3c", destination = "country.name")`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC, ASEAN, DATA EXTRACTED ON 08 MAR 2023 20, EA19, EASIA, EU13, EU15, EU27_2020, EU28, G20, NONOECD, OECD, ROW, ZASI, ZEUR, ZNAM, ZOTH, ZSCA
total_exports_processed <- geography_processed_3 |>
left_join(total_exports_2, by = "geography") |>
select(-geography) |>
pivot_longer(cols = matches("\\d{4}"),
names_to = "year",
names_transform = as.integer)
oecd_metadata_3 <- read_excel("total_exports.xlsx",range = "a3:c6", col_names = c("metadata_parameter", "blank", "metadata_value")) |>
select(-blank) |>
pivot_wider(names_from = metadata_parameter, values_from = metadata_value)
my_oecd_metadata_3 <- tribble(~my_indicator_name,
"Emissions from all Exports")
final_total_exports <- total_exports_processed |> bind_cols(oecd_metadata_3) |>
bind_cols(my_oecd_metadata_3) %>%
janitor::clean_names()
final_total_exports # A tibble: 2,016 × 10
country_name iso3c count…¹ year value indic…² indus…³ partner unit my_in…⁴
<chr> <chr> <chr> <int> <dbl> <chr> <chr> <chr> <chr> <chr>
1 OECD member … OECD region 1995 466. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
2 OECD member … OECD region 1996 508. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
3 OECD member … OECD region 1997 560. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
4 OECD member … OECD region 1998 528. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
5 OECD member … OECD region 1999 492. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
6 OECD member … OECD region 2000 527. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
7 OECD member … OECD region 2001 546. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
8 OECD member … OECD region 2002 529. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
9 OECD member … OECD region 2003 533. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
10 OECD member … OECD region 2004 590. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
# … with 2,006 more rows, and abbreviated variable names ¹country_or_region,
# ²indicator, ³industry, ⁴my_indicator_name
Tidy Worldbank Data
gdp_ppp_2 <- gdp_ppp %>%
rename(year = date) %>%
rename(GDP_PPP = NY.GDP.MKTP.PP.CD) %>%
select(iso3c, country, year, GDP_PPP)
gdp_ppp_3 <- gdp_ppp_2 %>%
mutate(country_name = iso3c_to_country_name(iso3c)) %>%
relocate(country_name, .before = 1)Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = iso3c_to_country_name(iso3c)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: CHI, XKX
gdp_ppp_3 %>% filter(iso3c == "CHI" | iso3c == "XKX")# A tibble: 50 × 5
country_name iso3c country year GDP_PPP
<chr> <chr> <chr> <dbl> <dbl>
1 <NA> CHI Channel Islands 1995 NA
2 <NA> CHI Channel Islands 1996 NA
3 <NA> CHI Channel Islands 1997 NA
4 <NA> CHI Channel Islands 1998 NA
5 <NA> CHI Channel Islands 1999 NA
6 <NA> CHI Channel Islands 2000 NA
7 <NA> CHI Channel Islands 2001 NA
8 <NA> CHI Channel Islands 2002 NA
9 <NA> CHI Channel Islands 2003 NA
10 <NA> CHI Channel Islands 2004 NA
# … with 40 more rows
# set country_name values for Kosovo and Channel Islands, year as integer
gdp_ppp_3 <- gdp_ppp_3 %>%
mutate(country_name = ifelse(iso3c == "XKX", "Kosovo", country_name)) %>%
mutate(country_name = ifelse(iso3c == "CHI", "Channel Islands", country_name)) %>%
mutate(year = as.integer(year))
final_gdp_ppp <- gdp_ppp_3Saving Files
# saving files in processed_01 folder of project
write_csv(final_energy_imports, "./processed_01/final_energy_imports.csv")
write_csv(final_total_imports, "./processed_01/final_total_imports.csv")
write_csv(final_total_exports, "./processed_01/final_total_exports.csv")
write_csv(final_gdp_ppp, "./processed_01/final_gdp_ppp.csv")
write_csv(final_ghg_ex_LUCF, "./processed_01/final_ghg_ex_LUCF.csv")
write_csv(final_ghg_in_LUCF, "./processed_01/final_ghg_in_LUCF.csv")