final_Linyi

Exploratory Data Analysis

The question I wanna answer is how green finance has impacted green development in China. The green development included the performance of polluting industries and the energy consumption.

library(here)

here() starts at /Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper

library(ggplot2)
library(readr)
library(sf)

Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(ggplot2)

# Load necessary libraries
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ stringr   1.5.1
✔ forcats   1.0.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Load the datasets
energy_data <- here("03_data_processed", "China_energy.csv") |> 
  read_csv()

Rows: 200 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): indicator
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

environment_data <-  here("03_data_processed", "China_environment.csv") |> 
  read_csv()

Rows: 80 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): indicator
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

performance_data <- here("03_data_processed", "China_performance.csv") |> 
  read_csv()

Rows: 672 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): pfmc_name, indicator
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Inspect the structure of the datasets
str(energy_data)

spc_tbl_ [200 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ indicator: chr [1:200] "Growth Rate of GDP (%)" "Growth Rate of GDP (%)" "Growth Rate of GDP (%)" "Growth Rate of GDP (%)" ...
 $ year     : num [1:200] 2002 2003 2004 2005 2006 ...
 $ value    : num [1:200] 9.1 10 10.1 11.4 12.7 14.2 9.7 9.4 10.6 9.6 ...
 - attr(*, "spec")=
  .. cols(
  ..   indicator = col_character(),
  ..   year = col_double(),
  ..   value = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

str(environment_data)

spc_tbl_ [80 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ indicator: chr [1:80] "Total Investment in Environmental Pollution Control (100 million yuan)" "Total Investment in Environmental Pollution Control (100 million yuan)" "Total Investment in Environmental Pollution Control (100 million yuan)" "Total Investment in Environmental Pollution Control (100 million yuan)" ...
 $ year     : num [1:80] 2002 2003 2004 2005 2006 ...
 $ value    : num [1:80] 1456 1750 2058 2565 2780 ...
 - attr(*, "spec")=
  .. cols(
  ..   indicator = col_character(),
  ..   year = col_double(),
  ..   value = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

str(performance_data)

spc_tbl_ [672 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ pfmc_name: chr [1:672] "Rate of Return on Net Assets (%)" "Rate of Return on Net Assets (%)" "Rate of Return on Net Assets (%)" "Rate of Return on Net Assets (%)" ...
 $ indicator: chr [1:672] "Petroleum and Petrochemical Industry" "Petroleum and Petrochemical Industry" "Petroleum and Petrochemical Industry" "Petroleum and Petrochemical Industry" ...
 $ year     : num [1:672] 2008 2009 2010 2011 2012 ...
 $ value    : num [1:672] 11.9 6.5 1.2 6.77 6.84 4.24 5.08 2.38 -5.2 -5.3 ...
 - attr(*, "spec")=
  .. cols(
  ..   pfmc_name = col_character(),
  ..   indicator = col_character(),
  ..   year = col_double(),
  ..   value = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

# Interpolate missing values for a smooth line plot (if appropriate)
energy_data <- energy_data %>%
  group_by(indicator) %>%
  mutate(value = ifelse(is.na(value), approx(year, value, year)$y, value))

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `value = ifelse(is.na(value), approx(year, value, year)$y,
  value)`.
ℹ In group 7: `indicator = "Total Energy Consumption (tce/10,000 yuan)"`.
Caused by warning in `regularize.values()`:
! collapsing to unique 'x' values

# Plot the energy data, this time with the missing values interpolated
ggplot(energy_data, aes(x = year, y = value, color = indicator)) +
  geom_line() +
  labs(title = "Trends in Energy Production and Consumption", x = "Year", y = "Value")

Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_line()`).

# Impute missing values with median
environment_data_imputed <- environment_data %>%
  group_by(indicator) %>%
  mutate(value = ifelse(is.na(value), median(value, na.rm = TRUE), value))

# Create the stacked area chart with the imputed data
environment_data_imputed %>%
  ggplot(aes(x = year, y = value, fill = indicator)) +
  geom_area(position = 'stack') +
  theme_minimal() +
  labs(title = "Investment in Environmental Pollution Control Over Time",
       x = "Year",
       y = "Investment (100 million yuan)",
       fill = "Indicator")

# Analyze performance metrics in a specific industry, e.g., Power Generation Industry
performance_data %>%
  filter(indicator == "Power Generation Industry") %>%
  ggplot(aes(x = year, y = value, color = pfmc_name)) +
  geom_line() +
  labs(title = "Performance Metrics in Power Generation Industry",
       x = "Year",
       y = "Performance Value")

Trends in Energy Production and Consumption:

Increasing Energy Production and Consumption per Capita: This could indicate that the energy sector is expanding, potentially with green finance contributing to the development of more energy resources, possibly including renewable energy.

Stable Growth Rates: Fluctuations in the growth rate of energy consumption and production might reflect changes in investment focus or economic conditions. Consistent or increasing investment in green energy could smooth out extreme fluctuations if it leads to a stable supply of renewable energy.

Flat Total Efficiency: The stagnation in total efficiency suggests that, despite potential investments in green technology, significant gains in energy efficiency may not yet be realized, or measurement methods may not capture efficiency improvements from green finance initiatives.

Flat Total Energy Consumption: If this metric refers to a total figure rather than per capita, the flat line could indicate that increased efficiency or shifts to renewable sources are balancing out increases in per capita consumption.
Investment in Environmental Pollution Control Over Time:

Increasing Investment: The growth in investment in environmental pollution control suggests that there is a focus on sustainability, potentially influenced by green finance. This could reflect investments in cleaner production technologies, pollution control measures, or environmental restoration projects.

Sharp Decline in the Last Year: A sharp decrease may indicate a change in policy, a reduction in available green finance, or external economic factors impacting investment.
Performance Metrics in Power Generation Industry:

Variable Profitability Metrics: The volatility in profitability and return on investment metrics could reflect the challenges the power generation industry faces in transitioning to green technology. Initial investments in green development may not yield immediate financial returns but can be expected to improve over time.

Flat Technological Investment Ratio: If green finance is directed toward technology, a flat investment ratio might suggest that such investments are not keeping pace with the growth of the industry or that other areas are being prioritized.

The increase in energy consumption and production per capita, along with the increasing investment in pollution control, can be seen as signs of development. However, the lack of significant efficiency gains may point to a need for further investment or more effective deployment of green finance. It’s also important to note that while green finance may support growth in clean energy and pollution control, it can take time for investments to translate into observable efficiency improvements and performance gains in industries like power generation.

china_map <- st_read("/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/china-adminsitrative-regions_1174.geojson")

Reading layer `china-adminsitrative-regions_1174' from data source 
  `/Users/zhenglinyi/Desktop/24 spring/sustainable finance/final paper/china-adminsitrative-regions_1174.geojson' 
  using driver `GeoJSON'
Simple feature collection with 31 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 73.65154 ymin: 18.29375 xmax: 134.7716 ymax: 53.56086
Geodetic CRS:  WGS 84

names(china_map)

 [1] "GID_0"     "NAME_0"    "GID_1"     "NAME_1"    "VARNAME_1" "NL_NAME_1"
 [7] "TYPE_1"    "ENGTYPE_1" "CC_1"      "HASC_1"    "geometry"

library (here)
library(readxl)
library(countrycode)
library(tidyverse)
regional_economy <- here("03_data_processed", "gf_province.csv") |> 
  read_csv()

Rows: 32 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): gf_name, indicator
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

china_economy_map <-  merge(china_map, regional_economy, by.x = "NAME_1" , by.y = "indicator")
china_economy_map

Simple feature collection with 31 features and 13 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 73.65154 ymin: 18.29375 xmax: 134.7716 ymax: 53.56086
Geodetic CRS:  WGS 84
First 10 features:
      NAME_1 GID_0 NAME_0    GID_1        VARNAME_1
1      Anhui   CHN  China  CHN.1_1            Ānhuī
2    Beijing   CHN  China  CHN.2_1          Běijīng
3  Chongqing   CHN  China  CHN.3_1        Chóngqìng
4     Fujian   CHN  China  CHN.4_1           Fújiàn
5      Gansu   CHN  China  CHN.5_1            Gānsù
6  Guangdong   CHN  China  CHN.6_1        Guǎngdōng
7    Guangxi   CHN  China  CHN.7_1 Guǎngxī Zhuàngzú
8    Guizhou   CHN  China  CHN.8_1          Gùizhōu
9     Hainan   CHN  China  CHN.9_1           Hǎinán
10     Hebei   CHN  China CHN.10_1            Héběi
                       NL_NAME_1    TYPE_1         ENGTYPE_1 CC_1 HASC_1
1                      安徽|安徽     Shěng          Province       CN.AH
2                      北京|北京 Zhíxiáshì      Municipality       CN.BJ
3                      重慶|重庆 Zhíxiáshì      Municipality       CN.CQ
4                           福建     Shěng          Province       CN.FJ
5                      甘肅|甘肃     Shěng          Province       CN.GS
6                      廣東|广东     Shěng          Province       CN.GD
7  廣西壯族自治區|广西壮族自治区   Zìzhìqu Autonomous Region       CN.GX
8                      貴州|贵州     Shěng          Province       CN.GZ
9                           海南     Shěng          Province       CN.HA
10                          河北     Shěng          Province       CN.HB
                                                                            gf_name
1  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
2  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
3  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
4  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
5  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
6  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
7  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
8  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
9  Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
10 Investment Completed in Industrial Pollution Prevention/Local Fiscal Expenditure
   year        value                       geometry
1  2020 1.431222e-03 MULTIPOLYGON (((116.1296 29...
2  2020 4.418101e-05 MULTIPOLYGON (((117.3797 40...
3  2020 1.139818e-03 MULTIPOLYGON (((109.2702 28...
4  2020 2.303850e-03 MULTIPOLYGON (((118.1751 24...
5  2020 2.596212e-03 MULTIPOLYGON (((101.7742 33...
6  2020 9.086188e-04 MULTIPOLYGON (((109.7514 21...
7  2020 3.710713e-04 MULTIPOLYGON (((105.5414 23...
8  2020 3.562020e-03 MULTIPOLYGON (((104.53 24.7...
9  2020 3.849759e-04 MULTIPOLYGON (((109.7282 18...
10 2020 1.888206e-03 MULTIPOLYGON (((116.8943 39...

# Assuming the data has been corrected and `value` now contains numerical data:
china_economy_map$value <- as.numeric(china_economy_map$value) 
china_economy_map$value

 [1] 1.431222e-03 4.418101e-05 1.139818e-03 2.303850e-03 2.596212e-03
 [6] 9.086188e-04 3.710713e-04 3.562020e-03 3.849759e-04 1.888206e-03
[11] 7.321737e-04 3.777027e-04 2.986690e-03 5.274495e-04 5.363213e-04
[16] 1.088493e-03 3.048578e-04 7.697780e-04 2.093838e-03 2.778201e-03
[21] 1.027270e-04 3.097520e-04 2.599168e-03 3.383131e-03 8.507812e-04
[26] 3.757616e-04 1.633176e-03 1.387680e-03 8.325143e-04 1.293789e-03
[31] 2.752178e-03

library(ggplot2)
library(sf)

# Check the range of values
range(china_economy_map$value, na.rm = TRUE)

[1] 4.418101e-05 3.562020e-03

# Replace the color scale with a manual range based on the actual data range
# This assumes you have data from 0 to some positive maximum value
ggplot(data = china_economy_map) +
  geom_sf(aes(fill = value), color = "white") +
  scale_fill_gradient(low = "white", high = "dark green", 
                      na.value = "transparent", 
                      limits = c(0, max(china_economy_map$value, na.rm = TRUE))) +
  labs(title = "Regional Data Explorer") +
  theme_minimal()

# The 'low' and 'high' arguments set the colors for the low and high ends of your data range
# 'na.value' sets the color for NA values, which you can set to "transparent" to ignore them

regional_profits <- here("03_data_processed", "China_pfmc_province.csv") |> 
  read_csv()

Rows: 32 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): pfmc_name, indicator
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

china_profits_map <-  merge(china_map, regional_profits, by.x = "NAME_1" , by.y = "indicator")

ggplot(data = china_profits_map) +
  geom_sf(aes(fill = value), color = "white") +
  scale_fill_gradient(low = "white", high = "dark red", 
                      na.value = "transparent", 
                      limits = c(0, max(china_profits_map$value, na.rm = TRUE))) +
  labs(title = "Regional Data Explorer2") +
  theme_minimal()

From the figures we can see that generally the southeast cost provinces are willing to spend money on controlling pollution and they are also doing well in industrial profitability.