Susfin-PCAF-data tyding

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl) 
library(here)

here() starts at C:/Users/puxin/OneDrive/Documents/PCAF-data-raw

library(countrycode)

path_to_sheet1 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/EDGAR_GHG.xlsx"

EDGAR_GHG_data <- read_excel(path_to_sheet1, range = "D10:BF233")

EDGAR_GHG_data <- EDGAR_GHG_data %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Int. Aviation, Int. Shipping, Virgin Islands_USA
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

# Dynamically select year columns if they follow a specific naming pattern (e.g., are numeric)
year_cols <- grep("^[0-9]{4}$", names(EDGAR_GHG_data), value = TRUE)

EDGAR_GHG_data_long <- EDGAR_GHG_data %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    EDGAR_GHG_value = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet2 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/OECD_stat_exports.xlsx"

# Attempt to read the file again
OECD_exports_data <- read_excel(path_to_sheet2, range = "A6:Z90")


OECD_exports_data <- OECD_exports_data %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Asia-Pacific Economic Cooperation, Association of South East Asian Nations, East and Southeastern Asia, Eastern Asia, EU28 excluding EU15, Euro area (19 countries), Europe, European Union (15 countries), European Union (27 countries), European Union (28 countries), Group of Twenty, Non-OECD economies and aggregates, North America, OECD member countries, Other regions, Rest of the World, South and Central America, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

# Dynamically select year columns if they follow a specific naming pattern (e.g., are numeric)
year_cols <- grep("^[0-9]{4}$", names(OECD_exports_data), value = TRUE)

OECD_exports_data_long <-OECD_exports_data %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    OECD_exports_value = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet3 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/OECD_non-energy-imports.xlsx"

# Attempt to read the file again
OECD_nonenergy_import_data <- read_excel(path_to_sheet3, range = "A1:Y79")

OECD_nonenergy_import_data <- OECD_nonenergy_import_data %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Asia-Pacific Economic Cooperation, Association of South East Asian Nations, Eastern Asia, EU28 excluding EU15, Euro area (19 countries), European Union (15 countries), European Union (27 countries), European Union (28 countries), Group of Twenty, Non-OECD economies and aggregates, OECD member countries, Rest of the World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(OECD_nonenergy_import_data), value = TRUE)

OECD_nonenergy_import_data_long <-OECD_nonenergy_import_data %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    OECD_non_energy_import = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet4 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/OCED_D35-imports.xlsx"

# Attempt to read the file again
OECD_D35_import_data <- read_excel(path_to_sheet4, range = "A1:Z79")

OECD_D35_import_data <- OECD_D35_import_data %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Asia-Pacific Economic Cooperation, Association of South East Asian Nations, Eastern Asia, EU28 excluding EU15, Euro area (19 countries), European Union (15 countries), European Union (27 countries), European Union (28 countries), Group of Twenty, Non-OECD economies and aggregates, OECD member countries, Rest of the World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(OECD_D35_import_data), value = TRUE)

OECD_D35_import_data_long <-OECD_D35_import_data %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    OECD_D35_import_data = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet5 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/GDP_adjusted_by_PPP.xlsx"

# Attempt to read the file again
GDP_adjusted_PPP <- read_excel(path_to_sheet5, range = "A1:AJ267")

GDP_adjusted_PPP <- GDP_adjusted_PPP %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Africa Eastern and Southern, Africa Western and Central, Arab World, Caribbean small states, Central Europe and the Baltics, Early-demographic dividend, East Asia & Pacific, East Asia & Pacific (excluding high income), East Asia & Pacific (IDA & IBRD countries), Euro area, Europe & Central Asia, Europe & Central Asia (excluding high income), Europe & Central Asia (IDA & IBRD countries), European Union, Fragile and conflict affected situations, Heavily indebted poor countries (HIPC), High income, IBRD only, IDA & IBRD total, IDA blend, IDA only, IDA total, Late-demographic dividend, Latin America & Caribbean, Latin America & Caribbean (excluding high income), Latin America & the Caribbean (IDA & IBRD countries), Least developed countries: UN classification, Low & middle income, Low income, Lower middle income, Middle East & North Africa, Middle East & North Africa (excluding high income), Middle East & North Africa (IDA & IBRD countries), Middle income, North America, Not classified, OECD members, Other small states, Pacific island small states, Post-demographic dividend, Pre-demographic dividend, Small states, South Asia, South Asia (IDA & IBRD), Sub-Saharan Africa, Sub-Saharan Africa (excluding high income), Sub-Saharan Africa (IDA & IBRD countries), Upper middle income, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(GDP_adjusted_PPP), value = TRUE)

GDP_adjusted_PPP_long <-GDP_adjusted_PPP %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    GDP_adjusted_PPP = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet6 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/GDP_current_dollar.xlsx"

# Attempt to read the file again
GDP_current_dollar <- read_excel(path_to_sheet6, range = "A1:AJ267")

GDP_current_dollar <- GDP_current_dollar %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Africa Eastern and Southern, Africa Western and Central, Arab World, Caribbean small states, Central Europe and the Baltics, Early-demographic dividend, East Asia & Pacific, East Asia & Pacific (excluding high income), East Asia & Pacific (IDA & IBRD countries), Euro area, Europe & Central Asia, Europe & Central Asia (excluding high income), Europe & Central Asia (IDA & IBRD countries), European Union, Fragile and conflict affected situations, Heavily indebted poor countries (HIPC), High income, IBRD only, IDA & IBRD total, IDA blend, IDA only, IDA total, Late-demographic dividend, Latin America & Caribbean, Latin America & Caribbean (excluding high income), Latin America & the Caribbean (IDA & IBRD countries), Least developed countries: UN classification, Low & middle income, Low income, Lower middle income, Middle East & North Africa, Middle East & North Africa (excluding high income), Middle East & North Africa (IDA & IBRD countries), Middle income, North America, Not classified, OECD members, Other small states, Pacific island small states, Post-demographic dividend, Pre-demographic dividend, Small states, South Asia, South Asia (IDA & IBRD), Sub-Saharan Africa, Sub-Saharan Africa (excluding high income), Sub-Saharan Africa (IDA & IBRD countries), Upper middle income, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(GDP_current_dollar), value = TRUE)

GDP_current_dollar_long <-GDP_current_dollar %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    GDP_current_dollar = as.numeric(value) # Convert value to numeric if not already
  )

library(tidyverse) 
library(readxl) 
library(here) 
library(countrycode)

path_to_sheet7 <-"C:/Users/puxin/OneDrive/Documents/PCAF-data-raw/population.xlsx"

# Attempt to read the file again
population <- read_excel(path_to_sheet7, range = "A1:BM267")

population <- population %>%
  mutate(
    country_name = countrycode(sourcevar = Name, origin = "country.name", destination = "country.name"),
    iso3c = countrycode(sourcevar = Name, origin = "country.name", destination = "iso3c")
  )

Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `country_name = countrycode(sourcevar = Name, origin =
  "country.name", destination = "country.name")`.
Caused by warning:
! Some values were not matched unambiguously: Africa Eastern and Southern, Africa Western and Central, Arab World, Caribbean small states, Central Europe and the Baltics, Early-demographic dividend, East Asia & Pacific, East Asia & Pacific (excluding high income), East Asia & Pacific (IDA & IBRD countries), Euro area, Europe & Central Asia, Europe & Central Asia (excluding high income), Europe & Central Asia (IDA & IBRD countries), European Union, Fragile and conflict affected situations, Heavily indebted poor countries (HIPC), High income, IBRD only, IDA & IBRD total, IDA blend, IDA only, IDA total, Late-demographic dividend, Latin America & Caribbean, Latin America & Caribbean (excluding high income), Latin America & the Caribbean (IDA & IBRD countries), Least developed countries: UN classification, Low & middle income, Low income, Lower middle income, Middle East & North Africa, Middle East & North Africa (excluding high income), Middle East & North Africa (IDA & IBRD countries), Middle income, North America, Not classified, OECD members, Other small states, Pacific island small states, Post-demographic dividend, Pre-demographic dividend, Small states, South Asia, South Asia (IDA & IBRD), Sub-Saharan Africa, Sub-Saharan Africa (excluding high income), Sub-Saharan Africa (IDA & IBRD countries), Upper middle income, World
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

year_cols <- grep("^[0-9]{4}$", names(population), value = TRUE)

population_long <-population %>%
  pivot_longer(
    cols = all_of(year_cols), # Use all_of() to pivot only the year columns identified above
    names_to = "year",
    values_to = "value"
  ) %>%
 mutate(
    year = as.integer(year), # Convert year to integer
    population = as.numeric(value) # Convert value to numeric if not already
  )

library(dplyr)
library(readr)

# Selecting specific columns from the first dataset
EDGAR_GHG_selected <- EDGAR_GHG_data_long %>%
  select(Name, year, EDGAR_GHG_value)

# Selecting specific columns from the second dataset
OECD_exports_selected <- OECD_exports_data_long %>%
  select(Name, year, OECD_exports_value)

# Merging the selected columns from the two datasets
merged_data <- full_join(EDGAR_GHG_selected, OECD_exports_selected, by = c("Name", "year"))

OECD_nonenergy_import_selected <- OECD_nonenergy_import_data_long %>%
  select(Name, year, OECD_non_energy_import)

# Merging it into the existing merged_data
merged_data <- full_join(merged_data, OECD_nonenergy_import_selected, by = c("Name", "year"))

OECD_D35_import_selected <- OECD_D35_import_data_long %>%
  select(Name, year, OECD_D35_import_data)

merged_data <- full_join(merged_data, OECD_D35_import_selected, by = c("Name", "year"))
# Continue with GDP adjusted for PPP
GDP_adjusted_PPP_selected <- GDP_adjusted_PPP_long %>%
  select(Name, year, GDP_adjusted_PPP)

merged_data <- full_join(merged_data, GDP_adjusted_PPP_selected, by = c("Name", "year"))

# Continue with GDP in current dollars
GDP_current_dollar_selected <- GDP_current_dollar_long %>%
  select(Name, year, GDP_current_dollar)

merged_data <- full_join(merged_data, GDP_current_dollar_selected, by = c("Name", "year"))

# Continue with population data
population_selected <- population_long %>%
  select(Name, year, population)

merged_data <- full_join(merged_data, population_selected, by = c("Name", "year"))
# Assuming merged_data is your final dataset
PCAF_data_final <- merged_data

# Specify the file path and name for the final CSV
final_csv_path <- "PCAF-data-final.csv" # Adjust the path as needed

# Save the final dataset to a CSV file
write_csv(PCAF_data_final, final_csv_path)