Homework 6

Author

Alyssa Anderson

Tidying OECD Emissions Data

# Installing packages and loading libraries 
options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("readr")
Installing package into 'C:/Users/alyss/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
package 'readr' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'readr'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\alyss\AppData\Local\R\win-library\4.2\00LOCK\readr\libs\x64\readr.dll
to C:\Users\alyss\AppData\Local\R\win-library\4.2\readr\libs\x64\readr.dll:
Permission denied
Warning: restored 'readr'

The downloaded binary packages are in
    C:\Users\alyss\AppData\Local\Temp\RtmpwXZ7N3\downloaded_packages
install.packages("here")
Installing package into 'C:/Users/alyss/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
package 'here' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\alyss\AppData\Local\Temp\RtmpwXZ7N3\downloaded_packages
install.packages("openxlsx")
Installing package into 'C:/Users/alyss/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
package 'openxlsx' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'openxlsx'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
C:\Users\alyss\AppData\Local\R\win-library\4.2\00LOCK\openxlsx\libs\x64\openxlsx.dll
to
C:\Users\alyss\AppData\Local\R\win-library\4.2\openxlsx\libs\x64\openxlsx.dll:
Permission denied
Warning: restored 'openxlsx'

The downloaded binary packages are in
    C:\Users\alyss\AppData\Local\Temp\RtmpwXZ7N3\downloaded_packages
library(here)
here() starts at C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/MIEF Courses/Sustainable Finance
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.1     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(tidyverse)
library(writexl)
library(esquisse)
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(countrycode)
library(openxlsx)

# Reading in the Data 
  # OECD Domestic CO2 Emissions 
OECD_Data_xls <- read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/R Coding/data_raw/OECD_Domestic_Emissions.xls.xlsx")
New names:
• `` -> `...1`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
 # OECD CO2 Emissions from Imports 
OECD_Data_Import_xls <- read_excel("C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/R Coding/data_raw/OECD_Import_Data.xlsx")
New names:
• `` -> `...1`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
# Cleaning OECD Domestic Data
  # Drop the second column, and the first 5 rows with NA values
OECD_data_names <- OECD_Data_xls[-(1:7),-2]

# Set column names as country and year
colnames(OECD_data_names) <- c("Country", as.character(1995:2018))

# Create country code column
OECD_data_names <- OECD_data_names %>%
  separate(Country, into = c("CountryCode", "CountryName"), sep = ": ")
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [85].
# Snake case names
OECD_data_names <- OECD_data_names %>% 
  clean_names(case = "snake")

# Remove the leading "x" from each year column name
current_names <- colnames(OECD_data_names)
new_year_names <- sub("^x", "", current_names)
colnames(OECD_data_names) <- new_year_names

# Dropping the last row of NA values
OECD_data_names<- head(OECD_data_names, -1)

# Converting country codes to iso3 standard 
OECD_data_names$iso3c <- countrycode(OECD_data_names$country_name, "country.name", "iso3c")
Warning: Some values were not matched unambiguously: Asia-Pacific Economic Cooperation, Association of South East Asian Nations, East and Southeastern Asia, Eastern Asia, EU28 excluding EU15, Euro area (19 countries), Europe, European Union (15 countries), European Union (27 countries), European Union (28 countries), Group of Twenty, Non-OECD economies and aggregates, North America, OECD member countries, Other regions, Rest of the World, South and Central America, World
OECD_data_names_2 <- OECD_data_names[ , -ncol(OECD_data_names)]
colnames(OECD_data_names_2)[1] <- "iso3c"

# Pivoting the data to long format
OECD_long <- OECD_data_names_2 %>%
  pivot_longer(
    cols = -c(country_name, iso3c),  # Select all columns except for country
    names_to = "Year",  # The name of the new column for years
    values_to = "CO2_Emissions"  # The name of the new column for CO2 emissions values
  )

# Setting the years as numeric
OECD_long$Year <- as.numeric(as.character(OECD_long$Year))

# Re-ording columns to start with Country Name
OECD_long_1 <- OECD_long[, c("country_name", "iso3c", "Year", "CO2_Emissions")]

# Cleaning the Import Emissions data
  # Copying the steps above to clean the second data set
    # Dropping NAs
OECD_Import_Data <- OECD_Data_Import_xls[-(1:7),-2]

    # Naming columns 
colnames(OECD_Import_Data) <- c("country_name", as.character(1995:2018))

    # Creating country codes 
OECD_Import_Data <- OECD_Import_Data %>%
  separate(country_name, into = c("iso3c", "country_name"), sep = ": ")
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [84].
OECD_Import_Data<- head(OECD_Import_Data, -1)

    # Pivot long
OECD_Import_long <- OECD_Import_Data %>%
  pivot_longer(
    cols = -c(country_name, iso3c),
    names_to = "Year",
    values_to = "Imported_CO2_Emissions" 
  )

    # Making year numeric and renaming it
OECD_Import_long$Year <- as.numeric(as.character(OECD_Import_long$Year))
class(OECD_Import_long$Year)
[1] "numeric"
OECD_Import_long <- OECD_Import_long %>%
  rename(year = Year)

print(head(OECD_Import_long)) 
# A tibble: 6 × 4
  iso3c country_name           year Imported_CO2_Emissions
  <chr> <chr>                 <dbl> <chr>                 
1 OECD  OECD member countries  1995 72.808000000000007    
2 OECD  OECD member countries  1996 80.944999999999993    
3 OECD  OECD member countries  1997 87.03                 
4 OECD  OECD member countries  1998 89.052000000000007    
5 OECD  OECD member countries  1999 93.254999999999995    
6 OECD  OECD member countries  2000 111.105               
print(head(OECD_long_1))
# A tibble: 6 × 4
  country_name iso3c  Year CO2_Emissions     
  <chr>        <chr> <dbl> <chr>             
1 World        WLD    1995 21367.151000000002
2 World        WLD    1996 21815.234         
3 World        WLD    1997 22214.491999999998
4 World        WLD    1998 22391.011999999999
5 World        WLD    1999 22526.525000000001
6 World        WLD    2000 23239.757000000001
# Merging the two datasets 
merged_OECD_Emissions_4 <- merge(OECD_Import_long, OECD_long_1, by = c("country_name"))

# Renaming columns 
merged_OECD_Emissions_4 <- merged_OECD_Emissions_4 %>%
  rename(domestic_CO2_emissions = CO2_Emissions)
merged_OECD_Emissions_4 <- merged_OECD_Emissions_4 %>%
  rename(imported_CO2_emissions = Imported_CO2_Emissions)
merged_OECD_Emissions_4 <- merged_OECD_Emissions_4 %>%
  rename(iso3c = iso3c.x)

# Re-ording columns 
OECD_merged_1 <- merged_OECD_Emissions_4[, c("country_name", "iso3c", "year", "domestic_CO2_emissions", "imported_CO2_emissions")]

print(head(OECD_merged_1))
  country_name iso3c year domestic_CO2_emissions imported_CO2_emissions
1    Argentina   ARG 1995     123.64100000000001  1.7000000000000001E-2
2    Argentina   ARG 1995     135.53299999999999  1.7000000000000001E-2
3    Argentina   ARG 1995     140.66200000000001  1.7000000000000001E-2
4    Argentina   ARG 1995                152.238  1.7000000000000001E-2
5    Argentina   ARG 1995     151.16999999999999  1.7000000000000001E-2
6    Argentina   ARG 1995     148.33500000000001  1.7000000000000001E-2
# Write out data to clean data file
write.xlsx(OECD_merged_1, "C:/Users/alyss/OneDrive - Johns Hopkins/Desktop/R Coding/data_clean")