I used data from EPS database. To answer my research question, I want to use investment in pollution control to represent the index green finance (gf). I will use energy consumption and production compared to GDP to represent the index of green development (gd). Also I will use the profitability performance and technloogical investment ratio of the most polluting industries in China as the index of the performance of enterprises.

  1. start with the energy data
library (here)
## here() starts at /Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper
library(readxl)
library(countrycode)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
getwd()
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper"
path_to_sheet <- here("00_data_raw","China_Energy_Yearly_(Nationwide).xls")
path_to_sheet
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/China_Energy_Yearly_(Nationwide).xls"
read_energy_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet,
  sheet = "1",
  col_names = FALSE
)
sheet_header <- read_energy_sheet(range = "A1:U1")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
sheet_header
sheet_header_processed <- sheet_header |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(year = V1)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
  # fill scenario down

sheet_header_processed
energy_info <- read_energy_sheet(range = "A2:U11")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
energy_info_col_names <- names(energy_info)

sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(energy_info_col_names = energy_info_col_names)

energy_info_long <- energy_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "energy_info_col_names") 
combined_data <- energy_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
    # convert the year column from character to numeric
    mutate(year = as.integer(year)) |> 
  select(indicator, year, value)

combined_data
energy_info_range <- "A2:U11"


energy_info <- read_energy_sheet(range = energy_info_range)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
energy_info_col_names <- names(energy_info)

energy_info_long <- energy_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "energy_info_col_names") 

combined_data <- energy_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(indicator, year, value)

combined_data
read_energy_table <-
  function(energy_info_range) {
    
    energy_info <- read_energy_sheet(range = energy_info_range)
    
    energy_info_col_names <- names(energy_info)
    
    energy_info_long <- energy_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "energy_info_col_names") 
    
    combined_data <- energy_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(indicator, year, value)
    
    combined_data
  }
china_energy <-read_energy_table(energy_info_range = "A2:U11")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
write_csv(china_energy,here("03_data_processed", "China_energy.csv"))

cleaned_data <- here("03_data_processed", "China_energy.csv") |> 
  read_csv()
## Rows: 200 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): indicator
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_data
  1. next deal with gf index
path_to_sheet2 <- here("00_data_raw","China_Environment_Yearly_(Nationwide).xls")
path_to_sheet2
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/China_Environment_Yearly_(Nationwide).xls"
read_env_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet2,
  sheet = "1",
  col_names = FALSE
)
sheet_header2 <- read_env_sheet(range = "A1:U1")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
sheet_header2
sheet_header_processed2 <- sheet_header2 |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(year = V1)
  # fill scenario down

sheet_header_processed2
env_info <- read_env_sheet(range = "A2:U5")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
env_info_col_names <- names(env_info)

sheet_headers_and_col_names2 <- sheet_header_processed2 |> 
  add_column(env_info_col_names = env_info_col_names)

env_info_long <- env_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "env_info_col_names") 
combined_data2 <- env_info_long |> 
  left_join(sheet_headers_and_col_names2, by = join_by(env_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
    # convert the year column from character to numeric
    mutate(year = as.integer(year)) |> 
  select(indicator, year, value)

combined_data2
read_env_table <-
  function(env_info_range) {
    
    env_info <- read_env_sheet(range = env_info_range)
    
    env_info_col_names <- names(env_info)
    
    env_info_long <- env_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "env_info_col_names") 
    
    combined_data2 <- env_info_long |>
      left_join(sheet_headers_and_col_names2, by = join_by(env_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(indicator, year, value)
    
    combined_data2
  }
china_env <-read_env_table(env_info_range = "A2:U5")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
write_csv(china_env,here("03_data_processed", "China_environment.csv"))

cleaned_data2 <- here("03_data_processed", "China_environment.csv") |> 
  read_csv()
## Rows: 80 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): indicator
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_data2
  1. finally deal with enterprises performance
path_to_sheet3 <- here("00_data_raw","Industrial_Economy-China_Enterprise_Performance_Evaluation_Standard_Value.xls")
path_to_sheet3
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/Industrial_Economy-China_Enterprise_Performance_Evaluation_Standard_Value.xls"
read_pfmc_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet3,
  sheet = "1",
  col_names = FALSE
)
sheet_header3 <- read_pfmc_sheet(range = "B1:P1")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
sheet_header3
sheet_header_processed3 <- sheet_header3 |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(year = V1)
  # fill scenario down

sheet_header_processed3
pfmc_name <- read_pfmc_sheet(range = "A2") |> 
  pull()
## New names:
## • `` -> `...1`
pfmc_name
## [1] "Rate of Return on Net Assets (%)"
pfmc_info <- read_pfmc_sheet(range = "B3:P8")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
pfmc_info_col_names <- names(pfmc_info)

sheet_headers_and_col_names3 <- sheet_header_processed3 |> 
  add_column(pfmc_info_col_names = pfmc_info_col_names)

pfmc_info_long <- pfmc_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "pfmc_info_col_names") |> 
  add_column(pfmc_name)
combined_data3 <- pfmc_info_long |> 
  left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
    # convert the year column from character to numeric
    mutate(year = as.integer(year)) |> 
  select(pfmc_name, indicator, year, value)

combined_data3
pfmc_name_range <- "A2"
pfmc_info_range <- "B3:P8"


pfmc_name <- read_pfmc_sheet(range = pfmc_name_range) |> 
  pull()
## New names:
## • `` -> `...1`
pfmc_info <- read_pfmc_sheet(range = pfmc_info_range)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
pfmc_info_col_names <- names(pfmc_info)

pfmc_info_long <- pfmc_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "pfmc_info_col_names") |> 
  add_column(pfmc_name)

combined_data3 <- pfmc_info_long |> 
  left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(pfmc_name, indicator, year, value)

combined_data3
read_pfmc_table <-
  function(pfmc_name_range, pfmc_info_range) {
    pfmc_name <-
      read_pfmc_sheet(range = pfmc_name_range) |>
      pull()
    
    pfmc_info <- read_pfmc_sheet(range = pfmc_info_range)
    
    pfmc_info_col_names <- names(pfmc_info)
    
    pfmc_info_long <- pfmc_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "pfmc_info_col_names") |>
      add_column(pfmc_name)
    
    combined_data3 <- pfmc_info_long |>
      left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(pfmc_name, indicator,  year, value)
    
    combined_data3
  }
return_assets <- read_pfmc_table(
  pfmc_name_range = "A2",
  pfmc_info_range = "B3:P8"
)
## New names:
## New names:
## • `` -> `...1`
margin <- read_pfmc_table(
  pfmc_name_range = "A9",
  pfmc_info_range = "B9:P14"
)
## New names:
## New names:
## • `` -> `...1`
cash <- read_pfmc_table(
  pfmc_name_range = "A15",
  pfmc_info_range = "B15:P20"
)
## New names:
## New names:
## • `` -> `...1`
profits <- read_pfmc_table(
  pfmc_name_range = "A21",
  pfmc_info_range = "B21:P26"
)
## New names:
## New names:
## • `` -> `...1`
return_capital <- read_pfmc_table(
  pfmc_name_range = "A27",
  pfmc_info_range = "B27:P32"
)
## New names:
## New names:
## • `` -> `...1`
profits_growth <- read_pfmc_table(
  pfmc_name_range = "A33",
  pfmc_info_range = "B33:P38"
)
## New names:
## New names:
## • `` -> `...1`
total_assets <- read_pfmc_table(
  pfmc_name_range = "A39",
  pfmc_info_range = "B39:P44"
)
## New names:
## New names:
## • `` -> `...1`
tech_investment <- read_pfmc_table(
  pfmc_name_range = "A45",
  pfmc_info_range = "B45:P50"
)
## New names:
## New names:
## • `` -> `...1`
final_pfmc_table <- return_assets |> 
  bind_rows(margin) |> 
  bind_rows(cash) |>
  bind_rows(profits) |>
  bind_rows(return_capital)|>
  bind_rows(profits_growth)|>
  bind_rows(total_assets)|>
  bind_rows(tech_investment)

final_pfmc_table
write_csv(final_pfmc_table,here("03_data_processed", "China_performance.csv"))

cleaned_data3 <- here("03_data_processed", "China_performance.csv") |> 
  read_csv()
## Rows: 672 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): pfmc_name, indicator
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_data3