I used data from EPS database. To answer my research question, I want to use investment in pollution control to represent the index green finance (gf). I will use energy consumption and production compared to GDP to represent the index of green development (gd). Also I will use the profitability performance and technloogical investment ratio of the most polluting industries in China as the index of the performance of enterprises.
library (here)
## here() starts at /Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper
library(readxl)
library(countrycode)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
getwd()
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper"
path_to_sheet <- here("00_data_raw","China_Energy_Yearly_(Nationwide).xls")
path_to_sheet
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/China_Energy_Yearly_(Nationwide).xls"
read_energy_sheet <- partial(
.f = read_excel,
path = path_to_sheet,
sheet = "1",
col_names = FALSE
)
sheet_header <- read_energy_sheet(range = "A1:U1")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
sheet_header
sheet_header_processed <- sheet_header |>
# transpose the data
t() |>
# turn it back into a tibble
as_tibble() |>
# make them meaningful
rename(year = V1)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# fill scenario down
sheet_header_processed
energy_info <- read_energy_sheet(range = "A2:U11")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
energy_info_col_names <- names(energy_info)
sheet_headers_and_col_names <- sheet_header_processed |>
add_column(energy_info_col_names = energy_info_col_names)
energy_info_long <- energy_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "energy_info_col_names")
combined_data <- energy_info_long |>
left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# convert the year column from character to numeric
mutate(year = as.integer(year)) |>
select(indicator, year, value)
combined_data
energy_info_range <- "A2:U11"
energy_info <- read_energy_sheet(range = energy_info_range)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
energy_info_col_names <- names(energy_info)
energy_info_long <- energy_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "energy_info_col_names")
combined_data <- energy_info_long |>
left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# case_when is supercharged if else
mutate(
# convert the year column from character to numeric
year = as.integer(year)
) |>
select(indicator, year, value)
combined_data
read_energy_table <-
function(energy_info_range) {
energy_info <- read_energy_sheet(range = energy_info_range)
energy_info_col_names <- names(energy_info)
energy_info_long <- energy_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "energy_info_col_names")
combined_data <- energy_info_long |>
left_join(sheet_headers_and_col_names, by = join_by(energy_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# case_when is supercharged if else
mutate(
# convert the year column from character to numeric
year = as.integer(year)
) |>
select(indicator, year, value)
combined_data
}
china_energy <-read_energy_table(energy_info_range = "A2:U11")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
write_csv(china_energy,here("03_data_processed", "China_energy.csv"))
cleaned_data <- here("03_data_processed", "China_energy.csv") |>
read_csv()
## Rows: 200 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): indicator
## dbl (2): year, value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_data
path_to_sheet2 <- here("00_data_raw","China_Environment_Yearly_(Nationwide).xls")
path_to_sheet2
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/China_Environment_Yearly_(Nationwide).xls"
read_env_sheet <- partial(
.f = read_excel,
path = path_to_sheet2,
sheet = "1",
col_names = FALSE
)
sheet_header2 <- read_env_sheet(range = "A1:U1")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
sheet_header2
sheet_header_processed2 <- sheet_header2 |>
# transpose the data
t() |>
# turn it back into a tibble
as_tibble() |>
# make them meaningful
rename(year = V1)
# fill scenario down
sheet_header_processed2
env_info <- read_env_sheet(range = "A2:U5")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
env_info_col_names <- names(env_info)
sheet_headers_and_col_names2 <- sheet_header_processed2 |>
add_column(env_info_col_names = env_info_col_names)
env_info_long <- env_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "env_info_col_names")
combined_data2 <- env_info_long |>
left_join(sheet_headers_and_col_names2, by = join_by(env_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# convert the year column from character to numeric
mutate(year = as.integer(year)) |>
select(indicator, year, value)
combined_data2
read_env_table <-
function(env_info_range) {
env_info <- read_env_sheet(range = env_info_range)
env_info_col_names <- names(env_info)
env_info_long <- env_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "env_info_col_names")
combined_data2 <- env_info_long |>
left_join(sheet_headers_and_col_names2, by = join_by(env_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# case_when is supercharged if else
mutate(
# convert the year column from character to numeric
year = as.integer(year)
) |>
select(indicator, year, value)
combined_data2
}
china_env <-read_env_table(env_info_range = "A2:U5")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
write_csv(china_env,here("03_data_processed", "China_environment.csv"))
cleaned_data2 <- here("03_data_processed", "China_environment.csv") |>
read_csv()
## Rows: 80 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): indicator
## dbl (2): year, value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_data2
path_to_sheet3 <- here("00_data_raw","Industrial_Economy-China_Enterprise_Performance_Evaluation_Standard_Value.xls")
path_to_sheet3
## [1] "/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/00_data_raw/Industrial_Economy-China_Enterprise_Performance_Evaluation_Standard_Value.xls"
read_pfmc_sheet <- partial(
.f = read_excel,
path = path_to_sheet3,
sheet = "1",
col_names = FALSE
)
sheet_header3 <- read_pfmc_sheet(range = "B1:P1")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
sheet_header3
sheet_header_processed3 <- sheet_header3 |>
# transpose the data
t() |>
# turn it back into a tibble
as_tibble() |>
# make them meaningful
rename(year = V1)
# fill scenario down
sheet_header_processed3
pfmc_name <- read_pfmc_sheet(range = "A2") |>
pull()
## New names:
## • `` -> `...1`
pfmc_name
## [1] "Rate of Return on Net Assets (%)"
pfmc_info <- read_pfmc_sheet(range = "B3:P8")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
pfmc_info_col_names <- names(pfmc_info)
sheet_headers_and_col_names3 <- sheet_header_processed3 |>
add_column(pfmc_info_col_names = pfmc_info_col_names)
pfmc_info_long <- pfmc_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "pfmc_info_col_names") |>
add_column(pfmc_name)
combined_data3 <- pfmc_info_long |>
left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# convert the year column from character to numeric
mutate(year = as.integer(year)) |>
select(pfmc_name, indicator, year, value)
combined_data3
pfmc_name_range <- "A2"
pfmc_info_range <- "B3:P8"
pfmc_name <- read_pfmc_sheet(range = pfmc_name_range) |>
pull()
## New names:
## • `` -> `...1`
pfmc_info <- read_pfmc_sheet(range = pfmc_info_range)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
pfmc_info_col_names <- names(pfmc_info)
pfmc_info_long <- pfmc_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "pfmc_info_col_names") |>
add_column(pfmc_name)
combined_data3 <- pfmc_info_long |>
left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# case_when is supercharged if else
mutate(
# convert the year column from character to numeric
year = as.integer(year)
) |>
select(pfmc_name, indicator, year, value)
combined_data3
read_pfmc_table <-
function(pfmc_name_range, pfmc_info_range) {
pfmc_name <-
read_pfmc_sheet(range = pfmc_name_range) |>
pull()
pfmc_info <- read_pfmc_sheet(range = pfmc_info_range)
pfmc_info_col_names <- names(pfmc_info)
pfmc_info_long <- pfmc_info |>
rename(indicator = `...1`) |>
pivot_longer(cols = -indicator,
names_to = "pfmc_info_col_names") |>
add_column(pfmc_name)
combined_data3 <- pfmc_info_long |>
left_join(sheet_headers_and_col_names3, by = join_by(pfmc_info_col_names)) |>
# filter out what were empty columns (where years are NA)
filter(!is.na(year)) |>
# case_when is supercharged if else
mutate(
# convert the year column from character to numeric
year = as.integer(year)
) |>
select(pfmc_name, indicator, year, value)
combined_data3
}
return_assets <- read_pfmc_table(
pfmc_name_range = "A2",
pfmc_info_range = "B3:P8"
)
## New names:
## New names:
## • `` -> `...1`
margin <- read_pfmc_table(
pfmc_name_range = "A9",
pfmc_info_range = "B9:P14"
)
## New names:
## New names:
## • `` -> `...1`
cash <- read_pfmc_table(
pfmc_name_range = "A15",
pfmc_info_range = "B15:P20"
)
## New names:
## New names:
## • `` -> `...1`
profits <- read_pfmc_table(
pfmc_name_range = "A21",
pfmc_info_range = "B21:P26"
)
## New names:
## New names:
## • `` -> `...1`
return_capital <- read_pfmc_table(
pfmc_name_range = "A27",
pfmc_info_range = "B27:P32"
)
## New names:
## New names:
## • `` -> `...1`
profits_growth <- read_pfmc_table(
pfmc_name_range = "A33",
pfmc_info_range = "B33:P38"
)
## New names:
## New names:
## • `` -> `...1`
total_assets <- read_pfmc_table(
pfmc_name_range = "A39",
pfmc_info_range = "B39:P44"
)
## New names:
## New names:
## • `` -> `...1`
tech_investment <- read_pfmc_table(
pfmc_name_range = "A45",
pfmc_info_range = "B45:P50"
)
## New names:
## New names:
## • `` -> `...1`
final_pfmc_table <- return_assets |>
bind_rows(margin) |>
bind_rows(cash) |>
bind_rows(profits) |>
bind_rows(return_capital)|>
bind_rows(profits_growth)|>
bind_rows(total_assets)|>
bind_rows(tech_investment)
final_pfmc_table
write_csv(final_pfmc_table,here("03_data_processed", "China_performance.csv"))
cleaned_data3 <- here("03_data_processed", "China_performance.csv") |>
read_csv()
## Rows: 672 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): pfmc_name, indicator
## dbl (2): year, value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_data3