find you data_HW

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl) 
library(here)

here() starts at /Users/xingyuning/Desktop/susfin

library(countrycode)

path_to_sheet1 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/EGDAR_CO2.xlsx"

EDGAR_CO2_data <- read_excel(path_to_sheet1, range = "D10:BF233")

EDGAR_CO2_data <- EDGAR_CO2_data |> 
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Int. Aviation, Int. Shipping, Virgin Islands_USA
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

library(dplyr)
year_cols <- grep("^Y_[0-9]{4}$", names(EDGAR_CO2_data), value = TRUE)
print(year_cols)

 [1] "Y_1970" "Y_1971" "Y_1972" "Y_1973" "Y_1974" "Y_1975" "Y_1976" "Y_1977"
 [9] "Y_1978" "Y_1979" "Y_1980" "Y_1981" "Y_1982" "Y_1983" "Y_1984" "Y_1985"
[17] "Y_1986" "Y_1987" "Y_1988" "Y_1989" "Y_1990" "Y_1991" "Y_1992" "Y_1993"
[25] "Y_1994" "Y_1995" "Y_1996" "Y_1997" "Y_1998" "Y_1999" "Y_2000" "Y_2001"
[33] "Y_2002" "Y_2003" "Y_2004" "Y_2005" "Y_2006" "Y_2007" "Y_2008" "Y_2009"
[41] "Y_2010" "Y_2011" "Y_2012" "Y_2013" "Y_2014" "Y_2015" "Y_2016" "Y_2017"
[49] "Y_2018" "Y_2019" "Y_2020" "Y_2021" "Y_2022"

EDGAR_CO2_data_long <- EDGAR_CO2_data |> 
  pivot_longer(
    cols = all_of(year_cols), 
    names_to = "year",
    values_to = "CO2_value"
  ) |> 
  mutate(
    year = as.numeric(year), 
    CO2_value = as.numeric(CO2_value) 
  )

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `year = as.numeric(year)`.
Caused by warning:
! NAs introduced by coercion

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet2 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/OECD_stat_export.xlsx"

OECD_exports_data <- read_excel(path_to_sheet2, range = "A8:Z92")

New names:
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`

OECD_exports_data <- OECD_exports_data |> 
  mutate(
    country_name = countrycode(sourcevar = Country, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Country, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Country, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, WLD: World, ZASI: East and Southeastern Asia, ZEUR: Europe, ZNAM: North America, ZOTH: Other regions, ZSCA: South and Central America
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

print(year_cols)

year_cols <- grep("^[0-9]{4}$", names(OECD_exports_data), value = TRUE)
print(year_cols)
OECD_exports_data_long <-OECD_exports_data %>%
  pivot_longer(
    cols = all_of(year_cols), 
    names_to = "year",
    values_to = "value"
  ) |> 
 mutate(
    year = as.integer(year), 
    OECD_exports_value = as.numeric(value) 
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet3 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/OECD_non_energy_import.xls"

OECD_non_energy_import_data <- read_excel(path_to_sheet3, range = "A8:Z92")

New names:
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`

OECD_non_energy_import_data <- OECD_non_energy_import_data %>%
  mutate(
    country_name = countrycode(sourcevar = Country, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Country, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Country, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, WLD: World, ZASI: East and Southeastern Asia, ZEUR: Europe, ZNAM: North America, ZOTH: Other regions, ZSCA: South and Central America
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(OECD_non_energy_import_data), value = TRUE)
print(year_cols)

OECD_non_energy_import_data_long <-OECD_non_energy_import_data |> 
  pivot_longer(
    cols = all_of(year_cols), 
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    OECD_non_energy_import = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet4 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/OECD_D35Industry.xls"

OECD_D35_import_data <- read_excel(path_to_sheet4, range = "A8:Z92")

New names:
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`

OECD_D35_import_data <- OECD_D35_import_data %>%
  mutate(
    country_name = countrycode(sourcevar = Country, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Country, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Country, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, WLD: World, ZASI: East and Southeastern Asia, ZEUR: Europe, ZNAM: North America, ZOTH: Other regions, ZSCA: South and Central America
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(OECD_D35_import_data), value = TRUE)

OECD_D35_import_data_long <-OECD_D35_import_data %>%
  pivot_longer(
    cols = all_of(year_cols),
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), 
    OECD_D35_import_data = as.numeric(value) 
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet5 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/WB_GDP_adjustedbyPPP.xls"

# Attempt to read the file again
GDP_adjusted_PPP <- read_excel(path_to_sheet5, range = "A4:BO270")

GDP_adjusted_PPP <- GDP_adjusted_PPP %>%
  mutate(
    country_name = countrycode(sourcevar = CountryName, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = CountryName, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(...)`.
Caused by warning:
! Some values were not matched unambiguously: Africa Eastern and Southern, Africa Western and Central, Arab World, Caribbean small states, Central Europe and the Baltics, Early-demographic dividend, East Asia & Pacific, East Asia & Pacific (excluding high income), East Asia & Pacific (IDA & IBRD countries), Euro area, Europe & Central Asia, Europe & Central Asia (excluding high income), Europe & Central Asia (IDA & IBRD countries), European Union, Fragile and conflict affected situations, Heavily indebted poor countries (HIPC), High income, IBRD only, IDA & IBRD total, IDA blend, IDA only, IDA total, Late-demographic dividend, Latin America & Caribbean, Latin America & Caribbean (excluding high income), Latin America & the Caribbean (IDA & IBRD countries), Least developed countries: UN classification, Low & middle income, Low income, Lower middle income, Middle East & North Africa, Middle East & North Africa (excluding high income), Middle East & North Africa (IDA & IBRD countries), Middle income, North America, Not classified, OECD members, Other small states, Pacific island small states, Post-demographic dividend, Pre-demographic dividend, Small states, South Asia, South Asia (IDA & IBRD), Sub-Saharan Africa, Sub-Saharan Africa (excluding high income), Sub-Saharan Africa (IDA & IBRD countries), Upper middle income, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(GDP_adjusted_PPP), value = TRUE)

GDP_adjusted_PPP_long <-GDP_adjusted_PPP %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), 
    GDP_adjusted_PPP = as.numeric(value) 
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet6 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/WB_GPD_currentdollar.xls"

GDP_current_dollar <- read_excel(path_to_sheet6, range = "A4:BO270")

GDP_current_dollar <- GDP_current_dollar %>%
  mutate(
    country_name = countrycode(sourcevar = CountryName, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = CountryName, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(...)`.
Caused by warning:
! Some values were not matched unambiguously: Africa Eastern and Southern, Africa Western and Central, Arab World, Caribbean small states, Central Europe and the Baltics, Early-demographic dividend, East Asia & Pacific, East Asia & Pacific (excluding high income), East Asia & Pacific (IDA & IBRD countries), Euro area, Europe & Central Asia, Europe & Central Asia (excluding high income), Europe & Central Asia (IDA & IBRD countries), European Union, Fragile and conflict affected situations, Heavily indebted poor countries (HIPC), High income, IBRD only, IDA & IBRD total, IDA blend, IDA only, IDA total, Late-demographic dividend, Latin America & Caribbean, Latin America & Caribbean (excluding high income), Latin America & the Caribbean (IDA & IBRD countries), Least developed countries: UN classification, Low & middle income, Low income, Lower middle income, Middle East & North Africa, Middle East & North Africa (excluding high income), Middle East & North Africa (IDA & IBRD countries), Middle income, North America, Not classified, OECD members, Other small states, Pacific island small states, Post-demographic dividend, Pre-demographic dividend, Small states, South Asia, South Asia (IDA & IBRD), Sub-Saharan Africa, Sub-Saharan Africa (excluding high income), Sub-Saharan Africa (IDA & IBRD countries), Upper middle income, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(GDP_current_dollar), value = TRUE)

GDP_current_dollar_long <-GDP_current_dollar %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    GDP_current_dollar = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet7 <-"/Users/xingyuning/Desktop/susfin/find your data_raw data/WB_Population.xls"

# Attempt to read the file again
population <- read_excel(path_to_sheet7, range = "A4:B0270")

population <- population %>%
  mutate(
    country_name = countrycode(sourcevar = CountryName, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = CountryName, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(...)`.
Caused by warning:
! Some values were not matched unambiguously: Africa Eastern and Southern, Africa Western and Central, Arab World, Caribbean small states, Central Europe and the Baltics, Early-demographic dividend, East Asia & Pacific, East Asia & Pacific (excluding high income), East Asia & Pacific (IDA & IBRD countries), Euro area, Europe & Central Asia, Europe & Central Asia (excluding high income), Europe & Central Asia (IDA & IBRD countries), European Union, Fragile and conflict affected situations, Heavily indebted poor countries (HIPC), High income, IBRD only, IDA & IBRD total, IDA blend, IDA only, IDA total, Late-demographic dividend, Latin America & Caribbean, Latin America & Caribbean (excluding high income), Latin America & the Caribbean (IDA & IBRD countries), Least developed countries: UN classification, Low & middle income, Low income, Lower middle income, Middle East & North Africa, Middle East & North Africa (excluding high income), Middle East & North Africa (IDA & IBRD countries), Middle income, North America, Not classified, OECD members, Other small states, Pacific island small states, Post-demographic dividend, Pre-demographic dividend, Small states, South Asia, South Asia (IDA & IBRD), Sub-Saharan Africa, Sub-Saharan Africa (excluding high income), Sub-Saharan Africa (IDA & IBRD countries), Upper middle income, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(population), value = TRUE)

population_long <-population %>%
  pivot_longer(
    cols = all_of(year_cols), 
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), 
    population = as.numeric(value) 
  )

library(dplyr)
library(readr)


EDGAR_CO2_selected <- EDGAR_CO2_data_long %>%
  select(Name, year, CO2_value) 

OECD_exports_selected <- OECD_exports_data_long %>%
  select(Country, year, OECD_exports_value)

merged_data <- full_join(EDGAR_GHG_selected, OECD_exports_selected, by = c("Name", "year"))

OECD_non_energy_import_selected <- OECD_non_energy_import_data_long %>%
  select(Name, year, OECD_non_energy_import)


OECD_D35_import_selected <- OECD_D35_import_data_long %>%
  select(Name, year, OECD_D35_import_data)

merged_data <- full_join(merged_data, OECD_D35_import_selected, by = c("Name", "year"))

GDP_adjusted_PPP_selected <- GDP_adjusted_PPP_long %>%
  select(Name, year, GDP_adjusted_PPP)

merged_data <- full_join(merged_data, GDP_adjusted_PPP_selected, by = c("Name", "year"))

GDP_current_dollar_selected <- GDP_current_dollar_long %>%
  select(Name, year, GDP_current_dollar)

merged_data <- full_join(merged_data, GDP_current_dollar_selected, by = c("Name", "year"))

population_selected <- population_long %>%
  select(Name, year, population)

merged_data <- full_join(merged_data, population_selected, by = c("Name", "year"))

PCAF_data_final <- merged_data

final_csv_path <- "PCAF-data-final.csv" 

write_csv(PCAF_data_final, final_csv_path)