library(here)here() starts at /Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper
library(ggplot2)
library(readr)
library(sf)Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggplot2)The question I wanna answer is how green finance has impacted green development in China. The green development included the performance of polluting industries and the energy consumption.
library(here)here() starts at /Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper
library(ggplot2)
library(readr)
library(sf)Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggplot2)# Load necessary libraries
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ stringr 1.5.1
✔ forcats 1.0.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the datasets
energy_data <- here("03_data_processed", "China_energy.csv") |>
read_csv()Rows: 200 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): indicator
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
environment_data <- here("03_data_processed", "China_environment.csv") |>
read_csv()Rows: 80 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): indicator
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
performance_data <- here("03_data_processed", "China_performance.csv") |>
read_csv()Rows: 672 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): pfmc_name, indicator
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Inspect the structure of the datasets
str(energy_data)spc_tbl_ [200 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ indicator: chr [1:200] "Growth Rate of GDP (%)" "Growth Rate of GDP (%)" "Growth Rate of GDP (%)" "Growth Rate of GDP (%)" ...
$ year : num [1:200] 2002 2003 2004 2005 2006 ...
$ value : num [1:200] 9.1 10 10.1 11.4 12.7 14.2 9.7 9.4 10.6 9.6 ...
- attr(*, "spec")=
.. cols(
.. indicator = col_character(),
.. year = col_double(),
.. value = col_double()
.. )
- attr(*, "problems")=<externalptr>
str(environment_data)spc_tbl_ [80 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ indicator: chr [1:80] "Total Investment in Environmental Pollution Control (100 million yuan)" "Total Investment in Environmental Pollution Control (100 million yuan)" "Total Investment in Environmental Pollution Control (100 million yuan)" "Total Investment in Environmental Pollution Control (100 million yuan)" ...
$ year : num [1:80] 2002 2003 2004 2005 2006 ...
$ value : num [1:80] 1456 1750 2058 2565 2780 ...
- attr(*, "spec")=
.. cols(
.. indicator = col_character(),
.. year = col_double(),
.. value = col_double()
.. )
- attr(*, "problems")=<externalptr>
str(performance_data)spc_tbl_ [672 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ pfmc_name: chr [1:672] "Rate of Return on Net Assets (%)" "Rate of Return on Net Assets (%)" "Rate of Return on Net Assets (%)" "Rate of Return on Net Assets (%)" ...
$ indicator: chr [1:672] "Petroleum and Petrochemical Industry" "Petroleum and Petrochemical Industry" "Petroleum and Petrochemical Industry" "Petroleum and Petrochemical Industry" ...
$ year : num [1:672] 2008 2009 2010 2011 2012 ...
$ value : num [1:672] 11.9 6.5 1.2 6.77 6.84 4.24 5.08 2.38 -5.2 -5.3 ...
- attr(*, "spec")=
.. cols(
.. pfmc_name = col_character(),
.. indicator = col_character(),
.. year = col_double(),
.. value = col_double()
.. )
- attr(*, "problems")=<externalptr>
# Interpolate missing values for a smooth line plot (if appropriate)
energy_data <- energy_data %>%
group_by(indicator) %>%
mutate(value = ifelse(is.na(value), approx(year, value, year)$y, value))Warning: There was 1 warning in `mutate()`.
ℹ In argument: `value = ifelse(is.na(value), approx(year, value, year)$y,
value)`.
ℹ In group 7: `indicator = "Total Energy Consumption (tce/10,000 yuan)"`.
Caused by warning in `regularize.values()`:
! collapsing to unique 'x' values
# Plot the energy data, this time with the missing values interpolated
ggplot(energy_data, aes(x = year, y = value, color = indicator)) +
geom_line() +
labs(title = "Trends in Energy Production and Consumption", x = "Year", y = "Value")Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_line()`).
# Impute missing values with median
environment_data_imputed <- environment_data %>%
group_by(indicator) %>%
mutate(value = ifelse(is.na(value), median(value, na.rm = TRUE), value))
# Create the stacked area chart with the imputed data
environment_data_imputed %>%
ggplot(aes(x = year, y = value, fill = indicator)) +
geom_area(position = 'stack') +
theme_minimal() +
labs(title = "Investment in Environmental Pollution Control Over Time",
x = "Year",
y = "Investment (100 million yuan)",
fill = "Indicator")# Analyze performance metrics in a specific industry, e.g., Power Generation Industry
performance_data %>%
filter(indicator == "Power Generation Industry") %>%
ggplot(aes(x = year, y = value, color = pfmc_name)) +
geom_line() +
labs(title = "Performance Metrics in Power Generation Industry",
x = "Year",
y = "Performance Value")Trends in Energy Production and Consumption:
Increasing Energy Production and Consumption per Capita: This could indicate that the energy sector is expanding, potentially with green finance contributing to the development of more energy resources, possibly including renewable energy.
Stable Growth Rates: Fluctuations in the growth rate of energy consumption and production might reflect changes in investment focus or economic conditions. Consistent or increasing investment in green energy could smooth out extreme fluctuations if it leads to a stable supply of renewable energy.
Flat Total Efficiency: The stagnation in total efficiency suggests that, despite potential investments in green technology, significant gains in energy efficiency may not yet be realized, or measurement methods may not capture efficiency improvements from green finance initiatives.
Flat Total Energy Consumption: If this metric refers to a total figure rather than per capita, the flat line could indicate that increased efficiency or shifts to renewable sources are balancing out increases in per capita consumption.
Investment in Environmental Pollution Control Over Time:
Increasing Investment: The growth in investment in environmental pollution control suggests that there is a focus on sustainability, potentially influenced by green finance. This could reflect investments in cleaner production technologies, pollution control measures, or environmental restoration projects.
Sharp Decline in the Last Year: A sharp decrease may indicate a change in policy, a reduction in available green finance, or external economic factors impacting investment.
Performance Metrics in Power Generation Industry:
Variable Profitability Metrics: The volatility in profitability and return on investment metrics could reflect the challenges the power generation industry faces in transitioning to green technology. Initial investments in green development may not yield immediate financial returns but can be expected to improve over time.
Flat Technological Investment Ratio: If green finance is directed toward technology, a flat investment ratio might suggest that such investments are not keeping pace with the growth of the industry or that other areas are being prioritized.
The increase in energy consumption and production per capita, along with the increasing investment in pollution control, can be seen as signs of development. However, the lack of significant efficiency gains may point to a need for further investment or more effective deployment of green finance. It’s also important to note that while green finance may support growth in clean energy and pollution control, it can take time for investments to translate into observable efficiency improvements and performance gains in industries like power generation.
china_map <- st_read("/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/china-adminsitrative-regions_1174.geojson")Reading layer `china-adminsitrative-regions_1174' from data source
`/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/china-adminsitrative-regions_1174.geojson'
using driver `GeoJSON'
Simple feature collection with 31 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 73.65154 ymin: 18.29375 xmax: 134.7716 ymax: 53.56086
Geodetic CRS: WGS 84
names(china_map) [1] "GID_0" "NAME_0" "GID_1" "NAME_1" "VARNAME_1" "NL_NAME_1"
[7] "TYPE_1" "ENGTYPE_1" "CC_1" "HASC_1" "geometry"
library (here)
library(readxl)
library(countrycode)
library(tidyverse)
regional_economy <- here("03_data_processed", "gf_province.csv") |>
read_csv()Rows: 32 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): gf_name, indicator
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
china_economy_map <- merge(china_map, regional_economy, by.x = "NAME_1" , by.y = "indicator")
china_economy_mapSimple feature collection with 31 features and 13 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 73.65154 ymin: 18.29375 xmax: 134.7716 ymax: 53.56086
Geodetic CRS: WGS 84
First 10 features:
NAME_1 GID_0 NAME_0 GID_1 VARNAME_1
1 Anhui CHN China CHN.1_1 Ānhuī
2 Beijing CHN China CHN.2_1 Běijīng
3 Chongqing CHN China CHN.3_1 Chóngqìng
4 Fujian CHN China CHN.4_1 Fújiàn
5 Gansu CHN China CHN.5_1 Gānsù
6 Guangdong CHN China CHN.6_1 Guǎngdōng
7 Guangxi CHN China CHN.7_1 Guǎngxī Zhuàngzú
8 Guizhou CHN China CHN.8_1 Gùizhōu
9 Hainan CHN China CHN.9_1 Hǎinán
10 Hebei CHN China CHN.10_1 Héběi
NL_NAME_1 TYPE_1 ENGTYPE_1 CC_1 HASC_1
1 安徽|安徽 Shěng Province CN.AH
2 北京|北京 Zhíxiáshì Municipality CN.BJ
3 重慶|重庆 Zhíxiáshì Municipality CN.CQ
4 福建 Shěng Province CN.FJ
5 甘肅|甘肃 Shěng Province CN.GS
6 廣東|广东 Shěng Province CN.GD
7 廣西壯族自治區|广西壮族自治区 Zìzhìqu Autonomous Region CN.GX
8 貴州|贵州 Shěng Province CN.GZ
9 海南 Shěng Province CN.HA
10 河北 Shěng Province CN.HB
gf_name
1 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
2 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
3 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
4 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
5 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
6 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
7 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
8 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
9 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
10 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
year value geometry
1 2020 1.431222e-03 MULTIPOLYGON (((116.1296 29...
2 2020 4.418101e-05 MULTIPOLYGON (((117.3797 40...
3 2020 1.139818e-03 MULTIPOLYGON (((109.2702 28...
4 2020 2.303850e-03 MULTIPOLYGON (((118.1751 24...
5 2020 2.596212e-03 MULTIPOLYGON (((101.7742 33...
6 2020 9.086188e-04 MULTIPOLYGON (((109.7514 21...
7 2020 3.710713e-04 MULTIPOLYGON (((105.5414 23...
8 2020 3.562020e-03 MULTIPOLYGON (((104.53 24.7...
9 2020 3.849759e-04 MULTIPOLYGON (((109.7282 18...
10 2020 1.888206e-03 MULTIPOLYGON (((116.8943 39...
# Assuming the data has been corrected and `value` now contains numerical data:
china_economy_map$value <- as.numeric(china_economy_map$value)
china_economy_map$value [1] 1.431222e-03 4.418101e-05 1.139818e-03 2.303850e-03 2.596212e-03
[6] 9.086188e-04 3.710713e-04 3.562020e-03 3.849759e-04 1.888206e-03
[11] 7.321737e-04 3.777027e-04 2.986690e-03 5.274495e-04 5.363213e-04
[16] 1.088493e-03 3.048578e-04 7.697780e-04 2.093838e-03 2.778201e-03
[21] 1.027270e-04 3.097520e-04 2.599168e-03 3.383131e-03 8.507812e-04
[26] 3.757616e-04 1.633176e-03 1.387680e-03 8.325143e-04 1.293789e-03
[31] 2.752178e-03
library(ggplot2)
library(sf)
# Check the range of values
range(china_economy_map$value, na.rm = TRUE)[1] 4.418101e-05 3.562020e-03
# Replace the color scale with a manual range based on the actual data range
# This assumes you have data from 0 to some positive maximum value
ggplot(data = china_economy_map) +
geom_sf(aes(fill = value), color = "white") +
scale_fill_gradient(low = "white", high = "dark green",
na.value = "transparent",
limits = c(0, max(china_economy_map$value, na.rm = TRUE))) +
labs(title = "Regional Data Explorer") +
theme_minimal()# The 'low' and 'high' arguments set the colors for the low and high ends of your data range
# 'na.value' sets the color for NA values, which you can set to "transparent" to ignore themregional_profits <- here("03_data_processed", "China_pfmc_province.csv") |>
read_csv()Rows: 32 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): pfmc_name, indicator
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
china_profits_map <- merge(china_map, regional_profits, by.x = "NAME_1" , by.y = "indicator")ggplot(data = china_profits_map) +
geom_sf(aes(fill = value), color = "white") +
scale_fill_gradient(low = "white", high = "dark red",
na.value = "transparent",
limits = c(0, max(china_profits_map$value, na.rm = TRUE))) +
labs(title = "Regional Data Explorer2") +
theme_minimal()From the figures we can see that generally the southeast cost provinces are willing to spend money on controlling pollution and they are also doing well in industrial profitability.