R Notebook

I used data from EPS database. To answer my research question, I want to use investment in pollution control to represent the index green finance (gf). I will use energy consumption and production compared to GDP to represent the index of green development (gd). Also I will use the profitability performance and technloogical investment ratio of the most polluting industries in China as the index of the performance of enterprises.

start with the energy data

library (here)

## here() starts at /Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper

library(readxl)
library(countrycode)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

getwd()

## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper"

path_to_sheet <- here("00_data_raw","China_Energy_Yearly_(Nationwide).xls")
path_to_sheet

## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/China_Energy_Yearly_(Nationwide).xls"

read_energy_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet,
  sheet = "1",
  col_names = FALSE
)
sheet_header <- read_energy_sheet(range = "A1:U1")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

sheet_header

sheet_header_processed <- sheet_header |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(year = V1)

## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  # fill scenario down

sheet_header_processed

energy_info <- read_energy_sheet(range = "A2:U11")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

energy_info_col_names <- names(energy_info)

sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(energy_info_col_names = energy_info_col_names)

energy_info_long <- energy_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "energy_info_col_names")

combined_data <- energy_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
    # convert the year column from character to numeric
    mutate(year = as.integer(year)) |> 
  select(indicator, year, value)

combined_data

energy_info_range <- "A2:U11"


energy_info <- read_energy_sheet(range = energy_info_range)

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

energy_info_col_names <- names(energy_info)

energy_info_long <- energy_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "energy_info_col_names") 

combined_data <- energy_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(indicator, year, value)

combined_data

read_energy_table <-
  function(energy_info_range) {
    
    energy_info <- read_energy_sheet(range = energy_info_range)
    
    energy_info_col_names <- names(energy_info)
    
    energy_info_long <- energy_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "energy_info_col_names") 
    
    combined_data <- energy_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(indicator, year, value)
    
    combined_data
  }

china_energy <-read_energy_table(energy_info_range = "A2:U11")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

write_csv(china_energy,here("03_data_processed", "China_energy.csv"))

cleaned_data <- here("03_data_processed", "China_energy.csv") |> 
  read_csv()

## Rows: 200 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): indicator
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cleaned_data

next deal with gf index

path_to_sheet2 <- here("00_data_raw","China_Environment_Yearly_(Nationwide).xls")
path_to_sheet2

## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/China_Environment_Yearly_(Nationwide).xls"

read_env_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet2,
  sheet = "1",
  col_names = FALSE
)
sheet_header2 <- read_env_sheet(range = "A1:U1")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

sheet_header2

sheet_header_processed2 <- sheet_header2 |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(year = V1)
  # fill scenario down

sheet_header_processed2

env_info <- read_env_sheet(range = "A2:U5")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

env_info_col_names <- names(env_info)

sheet_headers_and_col_names2 <- sheet_header_processed2 |> 
  add_column(env_info_col_names = env_info_col_names)

env_info_long <- env_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "env_info_col_names")

combined_data2 <- env_info_long |> 
  left_join(sheet_headers_and_col_names2, by = join_by(env_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
    # convert the year column from character to numeric
    mutate(year = as.integer(year)) |> 
  select(indicator, year, value)

combined_data2

read_env_table <-
  function(env_info_range) {
    
    env_info <- read_env_sheet(range = env_info_range)
    
    env_info_col_names <- names(env_info)
    
    env_info_long <- env_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "env_info_col_names") 
    
    combined_data2 <- env_info_long |>
      left_join(sheet_headers_and_col_names2, by = join_by(env_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(indicator, year, value)
    
    combined_data2
  }

china_env <-read_env_table(env_info_range = "A2:U5")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`

write_csv(china_env,here("03_data_processed", "China_environment.csv"))

cleaned_data2 <- here("03_data_processed", "China_environment.csv") |> 
  read_csv()

## Rows: 80 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): indicator
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cleaned_data2

finally deal with enterprises performance

path_to_sheet3 <- here("00_data_raw","Industrial_Economy-China_Enterprise_Performance_Evaluation_Standard_Value.xls")
path_to_sheet3

## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/Industrial_Economy-China_Enterprise_Performance_Evaluation_Standard_Value.xls"

read_pfmc_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet3,
  sheet = "1",
  col_names = FALSE
)
sheet_header3 <- read_pfmc_sheet(range = "B1:P1")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

sheet_header3

sheet_header_processed3 <- sheet_header3 |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(year = V1)
  # fill scenario down

sheet_header_processed3

pfmc_name <- read_pfmc_sheet(range = "A2") |> 
  pull()

## New names:
## • `` -> `...1`

pfmc_name

## [1] "Rate of Return on Net Assets (%)"

pfmc_info <- read_pfmc_sheet(range = "B3:P8")

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

pfmc_info_col_names <- names(pfmc_info)

sheet_headers_and_col_names3 <- sheet_header_processed3 |> 
  add_column(pfmc_info_col_names = pfmc_info_col_names)

pfmc_info_long <- pfmc_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "pfmc_info_col_names") |> 
  add_column(pfmc_name)

combined_data3 <- pfmc_info_long |> 
  left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
    # convert the year column from character to numeric
    mutate(year = as.integer(year)) |> 
  select(pfmc_name, indicator, year, value)

combined_data3

pfmc_name_range <- "A2"
pfmc_info_range <- "B3:P8"


pfmc_name <- read_pfmc_sheet(range = pfmc_name_range) |> 
  pull()

## New names:
## • `` -> `...1`

pfmc_info <- read_pfmc_sheet(range = pfmc_info_range)

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`

pfmc_info_col_names <- names(pfmc_info)

pfmc_info_long <- pfmc_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "pfmc_info_col_names") |> 
  add_column(pfmc_name)

combined_data3 <- pfmc_info_long |> 
  left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(pfmc_name, indicator, year, value)

combined_data3

read_pfmc_table <-
  function(pfmc_name_range, pfmc_info_range) {
    pfmc_name <-
      read_pfmc_sheet(range = pfmc_name_range) |>
      pull()
    
    pfmc_info <- read_pfmc_sheet(range = pfmc_info_range)
    
    pfmc_info_col_names <- names(pfmc_info)
    
    pfmc_info_long <- pfmc_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "pfmc_info_col_names") |>
      add_column(pfmc_name)
    
    combined_data3 <- pfmc_info_long |>
      left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(pfmc_name, indicator,  year, value)
    
    combined_data3
  }

return_assets <- read_pfmc_table(
  pfmc_name_range = "A2",
  pfmc_info_range = "B3:P8"
)

## New names:
## New names:
## • `` -> `...1`

margin <- read_pfmc_table(
  pfmc_name_range = "A9",
  pfmc_info_range = "B9:P14"
)

## New names:
## New names:
## • `` -> `...1`

cash <- read_pfmc_table(
  pfmc_name_range = "A15",
  pfmc_info_range = "B15:P20"
)

## New names:
## New names:
## • `` -> `...1`

profits <- read_pfmc_table(
  pfmc_name_range = "A21",
  pfmc_info_range = "B21:P26"
)

## New names:
## New names:
## • `` -> `...1`

return_capital <- read_pfmc_table(
  pfmc_name_range = "A27",
  pfmc_info_range = "B27:P32"
)

## New names:
## New names:
## • `` -> `...1`

profits_growth <- read_pfmc_table(
  pfmc_name_range = "A33",
  pfmc_info_range = "B33:P38"
)

## New names:
## New names:
## • `` -> `...1`

total_assets <- read_pfmc_table(
  pfmc_name_range = "A39",
  pfmc_info_range = "B39:P44"
)

## New names:
## New names:
## • `` -> `...1`

tech_investment <- read_pfmc_table(
  pfmc_name_range = "A45",
  pfmc_info_range = "B45:P50"
)

## New names:
## New names:
## • `` -> `...1`

final_pfmc_table <- return_assets |> 
  bind_rows(margin) |> 
  bind_rows(cash) |>
  bind_rows(profits) |>
  bind_rows(return_capital)|>
  bind_rows(profits_growth)|>
  bind_rows(total_assets)|>
  bind_rows(tech_investment)

final_pfmc_table

write_csv(final_pfmc_table,here("03_data_processed", "China_performance.csv"))

cleaned_data3 <- here("03_data_processed", "China_performance.csv") |> 
  read_csv()

## Rows: 672 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): pfmc_name, indicator
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cleaned_data3