CASD_1

Loading Libraries

# loading libraries
library(tidyverse)
library(tidyquant)
library(scales)
library(ggrepel)
library(readxl)
library(readr)
library(janitor)
library(here)
library(rnaturalearth)
library(countrycode)
library(wbstats)

Reading in Data

# read in data
ghg_emissions_in_LUCF <- read_csv("ghg_emissions_in_LUCF.csv")
ghg_emissions_ex_LUCF <- read_csv("ghg_emissions_ex_LUCF.csv")
energy_imports <- read_excel("energy_imports.xlsx", skip = 6)
total_imports <- read_excel("total_imports.xlsx", skip = 6)
total_exports <- read_excel("total_exports.xlsx", skip = 6)
gdp_ppp <- wb_data("NY.GDP.MKTP.PP.CD", start_date = 1995, end_date = 2019)

Tidy Climate Watch Data

# copy in useful functions for countrycodes
country_name_regex_to_iso3c <- function(country_name) {
  country_name |>
    countrycode(origin = "country.name", 
                                     destination = "iso3c",
                                     origin_regex = TRUE)
}

iso3c_to_country_name <- function(iso3c) {
  iso3c |>
  countrycode(origin = "iso3c", destination = "country.name")
}

# tidy Climate Watch data
# Climate Watch 1: ghg_emissions_excluding_LUCF

# longer format and timeframe to 1995-2019, years as integers
ghg_emissions_ex_LUCF_2 <- ghg_emissions_ex_LUCF %>% 
  select(`Country/Region`, unit, `1995`:`2019`) %>% 
    pivot_longer(
    cols = `1995`:`2019`, 
    names_to = "year",
    values_to = "GHG_excl_LUCF") %>% 
  mutate(year = as.integer(year))
  #drop_na()

# add country names, country codes, and flags
ghg_emissions_ex_LUCF_3 <- ghg_emissions_ex_LUCF_2 %>% 
  mutate(iso3c = country_name_regex_to_iso3c(`Country/Region`)) %>%
  mutate(country_name = iso3c_to_country_name(`iso3c`)) 
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `iso3c = country_name_regex_to_iso3c(`Country/Region`)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: Data source, Micronesia
# problem with Micronesia
# problem with adding flags
# ghg_emissions_ex_LUCF_3 %>% 
# mutate(flag = countrycode(sourcevar = iso3c, 
                            # origin = iso3c, 
                            # destination = "unicode.symbol"))
  
ghg_emissions_ex_LUCF_3 <- ghg_emissions_ex_LUCF_3 %>% 
  filter(unit != "Climate Watch")

# I think these data actually don't have regions unlike the OECD data

# move iso3c and country names to the front
ghg_emissions_ex_LUCF_4 <- ghg_emissions_ex_LUCF_3 %>% 
  relocate(iso3c, .before = 1) %>% 
  relocate(country_name, .before = 2) 

final_ghg_ex_LUCF <- ghg_emissions_ex_LUCF_4
   
# Climate Watch 2: ghg emissions including LUCF

# longer format and time to 1995-2019, years as integers
ghg_emissions_in_LUCF_2 <- ghg_emissions_in_LUCF %>% 
  select(`Country/Region`, unit, `1995`:`2019`) %>% 
    pivot_longer(
    cols = `1995`:`2019`, 
    names_to = "year",
    values_to = "GHG_incl_LUCF") %>% 
   mutate(year = as.integer(year))
  #drop_na()

# add country names, country codes, and flags
ghg_emissions_in_LUCF_3 <- ghg_emissions_in_LUCF_2 %>% 
  mutate(iso3c = country_name_regex_to_iso3c(`Country/Region`)) %>%
  mutate(iso3c = ifelse(`Country/Region` == "Micronesia", "FSM", iso3c)) %>% 
  mutate(country_name = iso3c_to_country_name(`iso3c`)) 
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `iso3c = country_name_regex_to_iso3c(`Country/Region`)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: Data source, Micronesia
# problem with Micronesia (iso3c and country_name don't show up)
# ghg_emissions_ex_LUCF_3 %>% filter (`Country/Region` == "Micronesia")

# problem with adding flags
# ghg_emissions_in_LUCF_3 %>% 
# mutate(flag = countrycode(sourcevar = iso3c, 
                            # origin = iso3c, 
                            # destination = "unicode.symbol"))
  
ghg_emissions_in_LUCF_3 <- ghg_emissions_in_LUCF_3 %>% 
  filter(unit != "Climate Watch")

# I think these data actually don't have regions unlike the OECD data

# move iso3c and country names to the front
ghg_emissions_in_LUCF_4 <- ghg_emissions_in_LUCF_3 %>% 
  relocate(iso3c, .before = 2) %>% 
  relocate(country_name, .before = 1) 

final_ghg_in_LUCF <- ghg_emissions_in_LUCF_4

Tidy OECD data

Tidy Energy Imports

# tidy OECD data 1: energy imports

energy_imports_2  <- energy_imports |>
    select(-`...2`) |>
    filter(Time != "Country") %>% 
    rename(geography = Time)

  geography_tbl <- energy_imports_2 |> 
    select(geography)

  geography_processed <- geography_tbl |> 
    separate(col = geography, into = c("geography_code", "geography_name"), 
             sep = ":", remove = FALSE) |> 
    mutate(across(c("geography_code", "geography_name"), str_trim)) |> 
    mutate(country_name = countrycode(sourcevar = geography_code, 
                                      origin = "iso3c", 
                                      destination = "country.name")) |>
    mutate(country_or_region = if_else(is.na(country_name), 
                                       true = "region",
                                       false = "country")) |>
    mutate(country_name = if_else(is.na(country_name),
                                  true = geography_name,
                                  false = country_name)) |> 
    select(geography, country_name, iso3c = geography_code, country_or_region)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = countrycode(sourcevar = geography_code, origin =
  "iso3c", destination = "country.name")`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC, ASEAN, DATA EXTRACTED ON 08 MAR 2023 20, EA19, EASIA, EU13, EU15, EU27_2020, EU28, G20, NONOECD, OECD, ROW, ZASI, ZEUR, ZNAM, ZOTH, ZSCA
  energy_imports_processed <- geography_processed |> 
    left_join(energy_imports_2, by = "geography") |> 
    select(-geography) |> 
    pivot_longer(cols = matches("\\d{4}"), 
                 names_to = "year", 
                 names_transform = as.integer)
  
  oecd_metadata_1 <- read_excel("energy_imports.xlsx",range = "a3:c6", col_names = c("metadata_parameter", "blank", "metadata_value")) |> 
    select(-blank) |> 
    pivot_wider(names_from = metadata_parameter, values_from = metadata_value)
  
 my_oecd_metadata_1 <- tribble(~my_indicator_name, 
                             "Emissions from Imported Electricity, Gas, and Heating/Cooling") 
 
  final_energy_imports <- energy_imports_processed |> bind_cols(oecd_metadata_1) |> 
bind_cols(my_oecd_metadata_1) %>% 
    janitor::clean_names()
  
  final_energy_imports 
# A tibble: 2,016 × 10
   country_name  iso3c count…¹  year value indic…² indus…³ partner unit  my_in…⁴
   <chr>         <chr> <chr>   <int> <dbl> <chr>   <chr>   <chr>   <chr> <chr>  
 1 OECD member … OECD  region   1995  439. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 2 OECD member … OECD  region   1996  415. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 3 OECD member … OECD  region   1997  414. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 4 OECD member … OECD  region   1998  475. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 5 OECD member … OECD  region   1999  551. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 6 OECD member … OECD  region   2000  656. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 7 OECD member … OECD  region   2001  569. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 8 OECD member … OECD  region   2002  573. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
 9 OECD member … OECD  region   2003  689. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
10 OECD member … OECD  region   2004  793. DFD_FC… D35: E… WLD: W… Tonn… Emissi…
# … with 2,006 more rows, and abbreviated variable names ¹​country_or_region,
#   ²​indicator, ³​industry, ⁴​my_indicator_name

Tidy Total Imports

total_imports_2  <- total_imports |>
    select(-`...2`) |>
    filter(Time != "Country") %>% 
    rename(geography = Time)

  geography_tbl_2 <- total_imports_2 |> 
    select(geography)

  geography_processed_2 <- geography_tbl_2 |> 
    separate(col = geography, into = c("geography_code", "geography_name"), 
             sep = ":", remove = FALSE) |> 
    mutate(across(c("geography_code", "geography_name"), str_trim)) |> 
    mutate(country_name = countrycode(sourcevar = geography_code, 
                                      origin = "iso3c", 
                                      destination = "country.name")) |>
    mutate(country_or_region = if_else(is.na(country_name), 
                                       true = "region",
                                       false = "country")) |>
    mutate(country_name = if_else(is.na(country_name),
                                  true = geography_name,
                                  false = country_name)) |> 
    select(geography, country_name, iso3c = geography_code, country_or_region)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = countrycode(sourcevar = geography_code, origin =
  "iso3c", destination = "country.name")`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC, ASEAN, DATA EXTRACTED ON 08 MAR 2023 20, EA19, EASIA, EU13, EU15, EU27_2020, EU28, G20, NONOECD, OECD, ROW, ZASI, ZEUR, ZNAM, ZOTH, ZSCA
total_imports_processed <- geography_processed_2 |> 
    left_join(total_imports_2, by = "geography") |> 
    select(-geography) |> 
    pivot_longer(cols = matches("\\d{4}"), 
                 names_to = "year", 
                 names_transform = as.integer)
  
  oecd_metadata_2 <- read_excel("total_imports.xlsx",range = "a3:c6", col_names = c("metadata_parameter", "blank", "metadata_value")) |> 
    select(-blank) |> 
    pivot_wider(names_from = metadata_parameter, values_from = metadata_value)
  
my_oecd_metadata_2 <- tribble(~my_indicator_name, 
                             "Emissions from all Imports") 
 
  final_total_imports <- total_imports_processed |> bind_cols(oecd_metadata_2) |> 
bind_cols(my_oecd_metadata_2) %>% 
    janitor::clean_names()
  
  
  final_total_imports 
# A tibble: 2,016 × 10
   country_name  iso3c count…¹  year value indic…² indus…³ partner unit  my_in…⁴
   <chr>         <chr> <chr>   <int> <dbl> <chr>   <chr>   <chr>   <chr> <chr>  
 1 OECD member … OECD  region   1995 1454. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 2 OECD member … OECD  region   1996 1385. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 3 OECD member … OECD  region   1997 1369. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 4 OECD member … OECD  region   1998 1554. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 5 OECD member … OECD  region   1999 1729. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 6 OECD member … OECD  region   2000 1953. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 7 OECD member … OECD  region   2001 1818. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 8 OECD member … OECD  region   2002 1884. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
 9 OECD member … OECD  region   2003 2149. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
10 OECD member … OECD  region   2004 2482. DFD_FC… DTOTAL… WLD: W… Tonn… Emissi…
# … with 2,006 more rows, and abbreviated variable names ¹​country_or_region,
#   ²​indicator, ³​industry, ⁴​my_indicator_name

Tidy Total Export Data

# tidy total export data

total_exports_2  <- total_exports |>
    select(-`...2`) |>
    filter(Time != "Country") %>% 
    rename(geography = Time)

  geography_tbl_3 <- total_exports_2 |> 
    select(geography)

  geography_processed_3 <- geography_tbl_3 |> 
    separate(col = geography, into = c("geography_code", "geography_name"), 
             sep = ":", remove = FALSE) |> 
    mutate(across(c("geography_code", "geography_name"), str_trim)) |> 
    mutate(country_name = countrycode(sourcevar = geography_code, 
                                      origin = "iso3c", 
                                      destination = "country.name")) |>
    mutate(country_or_region = if_else(is.na(country_name), 
                                       true = "region",
                                       false = "country")) |>
    mutate(country_name = if_else(is.na(country_name),
                                  true = geography_name,
                                  false = country_name)) |> 
    select(geography, country_name, iso3c = geography_code, country_or_region)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = countrycode(sourcevar = geography_code, origin =
  "iso3c", destination = "country.name")`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC, ASEAN, DATA EXTRACTED ON 08 MAR 2023 20, EA19, EASIA, EU13, EU15, EU27_2020, EU28, G20, NONOECD, OECD, ROW, ZASI, ZEUR, ZNAM, ZOTH, ZSCA
total_exports_processed <- geography_processed_3 |> 
    left_join(total_exports_2, by = "geography") |> 
    select(-geography) |> 
    pivot_longer(cols = matches("\\d{4}"), 
                 names_to = "year", 
                 names_transform = as.integer)
  
  oecd_metadata_3 <- read_excel("total_exports.xlsx",range = "a3:c6", col_names = c("metadata_parameter", "blank", "metadata_value")) |> 
    select(-blank) |> 
    pivot_wider(names_from = metadata_parameter, values_from = metadata_value)
  
my_oecd_metadata_3 <- tribble(~my_indicator_name, 
                             "Emissions from all Exports") 
 
  final_total_exports <- total_exports_processed |> bind_cols(oecd_metadata_3) |> 
bind_cols(my_oecd_metadata_3) %>% 
    janitor::clean_names()
  
  final_total_exports 
# A tibble: 2,016 × 10
   country_name  iso3c count…¹  year value indic…² indus…³ partner unit  my_in…⁴
   <chr>         <chr> <chr>   <int> <dbl> <chr>   <chr>   <chr>   <chr> <chr>  
 1 OECD member … OECD  region   1995  466. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 2 OECD member … OECD  region   1996  508. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 3 OECD member … OECD  region   1997  560. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 4 OECD member … OECD  region   1998  528. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 5 OECD member … OECD  region   1999  492. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 6 OECD member … OECD  region   2000  527. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 7 OECD member … OECD  region   2001  546. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 8 OECD member … OECD  region   2002  529. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
 9 OECD member … OECD  region   2003  533. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
10 OECD member … OECD  region   2004  590. FFD_DC… DTOTAL… WLD: W… Tonn… Emissi…
# … with 2,006 more rows, and abbreviated variable names ¹​country_or_region,
#   ²​indicator, ³​industry, ⁴​my_indicator_name

Tidy Worldbank Data

gdp_ppp_2 <- gdp_ppp %>% 
  rename(year = date) %>% 
  rename(GDP_PPP = NY.GDP.MKTP.PP.CD) %>% 
  select(iso3c, country, year, GDP_PPP)

gdp_ppp_3 <- gdp_ppp_2 %>% 
  mutate(country_name = iso3c_to_country_name(iso3c)) %>% 
  relocate(country_name, .before = 1)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `country_name = iso3c_to_country_name(iso3c)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: CHI, XKX
gdp_ppp_3 %>% filter(iso3c == "CHI" | iso3c == "XKX")
# A tibble: 50 × 5
   country_name iso3c country          year GDP_PPP
   <chr>        <chr> <chr>           <dbl>   <dbl>
 1 <NA>         CHI   Channel Islands  1995      NA
 2 <NA>         CHI   Channel Islands  1996      NA
 3 <NA>         CHI   Channel Islands  1997      NA
 4 <NA>         CHI   Channel Islands  1998      NA
 5 <NA>         CHI   Channel Islands  1999      NA
 6 <NA>         CHI   Channel Islands  2000      NA
 7 <NA>         CHI   Channel Islands  2001      NA
 8 <NA>         CHI   Channel Islands  2002      NA
 9 <NA>         CHI   Channel Islands  2003      NA
10 <NA>         CHI   Channel Islands  2004      NA
# … with 40 more rows
# set country_name values for Kosovo and Channel Islands, year as integer
gdp_ppp_3 <- gdp_ppp_3 %>% 
  mutate(country_name = ifelse(iso3c == "XKX", "Kosovo", country_name)) %>% 
  mutate(country_name = ifelse(iso3c == "CHI", "Channel Islands", country_name)) %>% 
   mutate(year = as.integer(year))

final_gdp_ppp <- gdp_ppp_3

Saving Files

# saving files in processed_01 folder of project
write_csv(final_energy_imports, "./processed_01/final_energy_imports.csv")
write_csv(final_total_imports, "./processed_01/final_total_imports.csv")
write_csv(final_total_exports, "./processed_01/final_total_exports.csv")
write_csv(final_gdp_ppp, "./processed_01/final_gdp_ppp.csv")
write_csv(final_ghg_ex_LUCF, "./processed_01/final_ghg_ex_LUCF.csv")
write_csv(final_ghg_in_LUCF, "./processed_01/final_ghg_in_LUCF.csv")