Data Cleanup

Author

Jingning Feng

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.1     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(readxl)
library(countrycode)
country_name_regex_to_iso3c <- function(country_name) {
  country_name |>
    countrycode(origin = "country.name", 
                destination = "iso3c",
                origin_regex = TRUE)
}

iso3c_to_country_name <- function(iso3c) {
  iso3c |>
  countrycode(origin = "iso3c", destination = "country.name")
}
raw_scope1 <- read_excel("RAW_DATA/Emissions of Exports of Goods and Services.xlsm", skip = 6) 
clean_scope1 <- raw_scope1 |> 
  mutate(iso3c = country_name_regex_to_iso3c(Country)) |> 
  mutate(country_name = iso3c_to_country_name(iso3c))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `iso3c = country_name_regex_to_iso3c(Country)`.
Caused by warning in `countrycode_convert()`:
! Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, Data extracted on 09 Mar 2023 15:28 UTC (GMT) from OECD.Stat, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, TUR: Türkiye, WLD: World, ZASI: East and Southeastern Asia, ZEUR: Europe, ZNAM: North America, ZOTH: Other regions, ZSCA: South and Central America
#scope2 <- read_excel("RAW_DATA/Grid-supplied electricity, steam and cooling imports.xlsm", skip = 6)
#scope3 <- read_excel("RAW_DATA/Non-energy imports.xlsm", skip = 6)
raw_scope1
# A tibble: 85 × 25
   Country `1995` `1996` `1997` `1998` `1999` `2000` `2001` `2002` `2003` `2004`
   <chr>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 WLD: W… 2.14e4 2.18e4 2.22e4 2.24e4 2.25e4 2.32e4 2.36e4 2.39e4 2.50e4 2.61e4
 2 OECD: … 1.30e4 1.33e4 1.35e4 1.37e4 1.40e4 1.46e4 1.44e4 1.44e4 1.49e4 1.54e4
 3 AUS: A… 2.78e2 2.84e2 2.98e2 3.22e2 3.20e2 3.24e2 3.22e2 3.40e2 3.50e2 3.81e2
 4 AUT: A… 8.51e1 8.94e1 8.36e1 8.57e1 8.37e1 8.39e1 8.57e1 8.44e1 9.33e1 9.78e1
 5 BEL: B… 1.30e2 1.32e2 1.27e2 1.31e2 1.27e2 1.27e2 1.25e2 1.21e2 1.29e2 1.36e2
 6 CAN: C… 4.09e2 4.17e2 4.36e2 4.39e2 4.46e2 4.57e2 4.50e2 4.71e2 5.08e2 5.17e2
 7 CHL: C… 3.99e1 4.62e1 5.31e1 5.66e1 5.59e1 5.39e1 5.10e1 5.37e1 5.44e1 5.82e1
 8 COL: C… 6.55e1 6.56e1 7.22e1 7.23e1 5.97e1 5.99e1 6.11e1 6.09e1 5.91e1 6.19e1
 9 CRI: C… 8.05e0 7.77e0 8.17e0 9.12e0 9.01e0 8.71e0 8.84e0 9.31e0 9.31e0 9.33e0
10 CZE: C… 1.05e2 1.09e2 1.06e2 9.98e1 9.61e1 1.02e2 1.03e2 1.02e2 1.07e2 1.04e2
# … with 75 more rows, and 14 more variables: `2005` <dbl>, `2006` <dbl>,
#   `2007` <dbl>, `2008` <dbl>, `2009` <dbl>, `2010` <dbl>, `2011` <dbl>,
#   `2012` <dbl>, `2013` <dbl>, `2014` <dbl>, `2015` <dbl>, `2016` <dbl>,
#   `2017` <dbl>, `2018` <dbl>
#scope2
#scope3