# Load packages
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(fixest)
library(sampleSelection)
## Loading required package: maxLik
## Loading required package: miscTools
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(modelsummary)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
financial <- read_excel("GISC Financial.xlsx")|>
filter(`(costat) Active/Inactive Status Marker`== "A", `(fic) Current ISO Country Code - Incorporation` == "USA") |>
select(subindustry_code = `(gsubind) GIC Sub-Industries`,
date = `(datadate) Data Date`,
gvkey = `(gvkey) Global Company Key`,
name = `(conm) Company Name`,
total_asset = `(at) Assets - Total`,
current_debt = `(dlc) Debt in Current Liabilities - Total`,
long_term_debt = `(dltt) Long-Term Debt - Total`,
net_income = `(ni) Net Income (Loss)`,
capex = `(capx) Capital Expenditures`) |>
mutate(total_debt = current_debt + long_term_debt,
ROA = 100*net_income/(total_asset - total_debt),
year = year(date),
gvkey = as.character(gvkey)) |>
select(-current_debt, -long_term_debt, -net_income, -total_debt, -date)
ghg_intensity = read_csv("GHG intensity.csv") |>
mutate(year = year(periodenddate), gvkey = as.character(gvkey)) |>
select(gvkey, year, ghg_intensity = di_319405)
## Rows: 1926 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): gvkey, ticker, companyname, country
## dbl (4): companyid, institutionid, fiscalyear, di_319405
## date (1): periodenddate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
esg_score <- read_csv("ESG Global Score.csv") |>
filter(
csascoretypename == "Raw",
aspectname %in% c(
"S&P Global ESG Score",
"Environmental Dimension",
"Social Dimension",
"Economic Governance Dimension"
)
) |>
transmute(
scoredate,
year = lubridate::year(scoredate),
gvkey = as.character(gvkey),
score_type = aspectname,
scorevalue
) |>
arrange(gvkey, year, score_type, scoredate) |>
group_by(gvkey, year, score_type) |>
slice_tail(n = 1) |>
ungroup() |>
select(-scoredate) |>
pivot_wider(
names_from = score_type,
values_from = scorevalue
) |>
rename(
ESG_composite_score = `S&P Global ESG Score`,
E_score = `Environmental Dimension`,
S_score = `Social Dimension`,
G_score = `Economic Governance Dimension`
)
## Rows: 5624 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): csascoretypename, scoretype, aspectname, ticker, companyname, country
## dbl (3): institutionid, scorevalue, gvkey
## date (1): scoredate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gas_production <- read_csv("Oil and gas production.csv") |>
select(year = datadate, gvkey, natural_gas_production = ogpngq, oil_production = ogpoilq, country = fic) |>
filter(country == "USA", !is.na(natural_gas_production), !is.na(oil_production)) |>
mutate(year = year(year))|>
group_by(gvkey, year) |>
mutate(annual_total_gas_production = sum(natural_gas_production,na.rm = TRUE),
annual_total_oil_production = sum(oil_production, na.rm = TRUE),
gvkey = as.character(gvkey)) |>
slice(1) |>
mutate(
gas_boe = annual_total_gas_production / 6000,
total_boe = annual_total_oil_production + gas_boe,
gas_percent = 100*gas_boe / total_boe
) |>
select(year, gvkey, gas_percent)
## Rows: 18531 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): datafmt, indfmt, consol, gvkey, conm, tic, fic, conml
## dbl (3): gsubind, ogpngq, ogpoilq
## date (1): datadate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
brent_oil <- read_csv("DCOILBRENTEU.csv") |>
mutate(year = year(observation_date)) |>
rename("oil_price" = DCOILBRENTEU) |>
filter(!is.na(oil_price)) |>
group_by (year) |>
summarize(oil_price = mean(oil_price))
## Rows: 4221 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): DCOILBRENTEU
## date (1): observation_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
joint_data <- financial |>
left_join(esg_score, by = join_by(gvkey, year)) |>
left_join(ghg_intensity, by = join_by(gvkey, year)) |>
left_join(gas_production, by = join_by(gvkey, year)) |>
left_join(brent_oil, by = join_by(year))
#Regression with Heckman Selection Model using lagged ESG scores
# Create lagged ESG variables and selection indicators
df <- joint_data |>
arrange(gvkey, year) |>
group_by(gvkey) |>
mutate(
ESG_lag = lag(ESG_composite_score),
E_lag = lag(E_score),
S_lag = lag(S_score),
G_lag = lag(G_score),
ESG_obs = if_else(!is.na(ESG_lag), 1, 0),
E_obs = if_else(!is.na(E_lag), 1, 0),
S_obs = if_else(!is.na(S_lag), 1, 0),
G_obs = if_else(!is.na(G_lag), 1, 0)
) |>
ungroup() |>
mutate(
log_capex = log(capex + 1),
subindustry_code = as.factor(subindustry_code),
year = as.integer(year)
) |>
filter(!is.na(log_capex), !is.na(year), !is.na(subindustry_code))
# 3) Function to create IMR from first-stage probit
add_imr <- function(data, sel_var, imr_name) {
f <- as.formula(
paste0(sel_var, " ~ log_capex + factor(year) + subindustry_code")
)
sel_mod <- glm(f, data = data, family = binomial(link = "probit"))
xb <- predict(sel_mod, type = "link")
phi <- dnorm(xb)
Phi <- pnorm(xb)
data[[imr_name]] <- ifelse(data[[sel_var]] == 1, phi / pmax(Phi, 1e-8), NA_real_)
data
}
# 4) Add IMRs for each score
df <- add_imr(df, "ESG_obs", "imr_ESG")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df <- add_imr(df, "E_obs", "imr_E")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df <- add_imr(df, "S_obs", "imr_S")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df <- add_imr(df, "G_obs", "imr_G")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# 5) Function to run 4 Heckman-style regressions for any lagged ESG variable
run_heckman_set <- function(data, x_var, sel_var, imr_var, label) {
reg_df <- data |>
filter(
.data[[sel_var]] == 1,
!is.na(ROA),
!is.na(.data[[x_var]]),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price),
!is.na(.data[[imr_var]])
)
f1 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + ", imr_var)
)
f2 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var)
)
f3 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey")
)
f4 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey + year")
)
list(
setNames(list(feols(f1, data = reg_df, vcov = ~ gvkey)), paste0(label, " (1)"))[[1]],
setNames(list(feols(f2, data = reg_df, vcov = ~ gvkey)), paste0(label, " (2)"))[[1]],
setNames(list(feols(f3, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm FE (3)"))[[1]],
setNames(list(feols(f4, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm+Year FE (4)"))[[1]]
)
}
# 6) Run model sets
mods_ESG <- run_heckman_set(df, "ESG_lag", "ESG_obs", "imr_ESG", "Composite ESG")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods_E <- run_heckman_set(df, "E_lag", "E_obs", "imr_E", "E score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods_S <- run_heckman_set(df, "S_lag", "S_obs", "imr_S", "S score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods_G <- run_heckman_set(df, "G_lag", "G_obs", "imr_G", "G score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
output_table <- modelsummary(
c(mods_ESG, mods_E, mods_S, mods_G),
stars = TRUE,
statistic = "({std.error})",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj|Within|Pseudo"
)
output_table
| |
(1) |
(2) |
(3) |
(4) |
(5) |
(6) |
(7) |
(8) |
(9) |
(10) |
(11) |
(12) |
(13) |
(14) |
(15) |
(16) |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| (Intercept) |
26.404 |
-53.129 |
|
|
26.408 |
-54.323 |
|
|
37.234 |
-49.976 |
|
|
14.800 |
-66.713 |
|
|
|
(27.120) |
(38.045) |
|
|
(24.504) |
(37.689) |
|
|
(23.378) |
(36.652) |
|
|
(25.238) |
(48.224) |
|
|
| ESG_lag |
0.328 |
-0.009 |
0.355 |
-0.849 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.388) |
(0.343) |
(0.704) |
(1.672) |
|
|
|
|
|
|
|
|
|
|
|
|
| ghg_intensity |
0.002 |
0.006 |
0.004 |
0.024 |
0.001 |
0.005 |
0.003 |
0.018 |
0.002 |
0.007 |
0.004 |
0.022 |
0.000 |
0.004 |
0.004 |
0.030 |
|
(0.004) |
(0.006) |
(0.007) |
(0.018) |
(0.005) |
(0.006) |
(0.007) |
(0.014) |
(0.004) |
(0.005) |
(0.008) |
(0.013) |
(0.005) |
(0.007) |
(0.007) |
(0.022) |
| gas_percent |
0.177 |
0.075 |
1.005 |
2.555 |
0.182 |
0.079 |
1.238 |
1.380 |
0.163 |
0.049 |
0.929 |
2.111 |
0.193 |
0.103 |
1.054 |
3.983 |
|
(0.200) |
(0.228) |
(1.767) |
(2.538) |
(0.199) |
(0.233) |
(1.705) |
(1.442) |
(0.191) |
(0.191) |
(1.668) |
(1.909) |
(0.196) |
(0.249) |
(1.994) |
(3.619) |
| imr_ESG |
-26.163 |
-17.004 |
16.106 |
-28.918 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(15.904) |
(13.363) |
(28.475) |
(114.589) |
|
|
|
|
|
|
|
|
|
|
|
|
| oil_price |
|
0.963+ |
1.011 |
|
|
0.957+ |
0.999 |
|
|
1.018+ |
1.016 |
|
|
0.930+ |
1.019 |
|
|
|
(0.486) |
(0.603) |
|
|
(0.487) |
(0.562) |
|
|
(0.512) |
(0.598) |
|
|
(0.470) |
(0.603) |
|
| E_lag |
|
|
|
|
0.308 |
0.032 |
0.575 |
0.404 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.298) |
(0.325) |
(0.577) |
(0.902) |
|
|
|
|
|
|
|
|
| imr_E |
|
|
|
|
-24.523 |
-16.362 |
26.176 |
1.476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(15.835) |
(13.089) |
(28.389) |
(102.198) |
|
|
|
|
|
|
|
|
| S_lag |
|
|
|
|
|
|
|
|
0.094 |
-0.277 |
0.216 |
-0.370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.308) |
(0.324) |
(0.411) |
(0.651) |
|
|
|
|
| imr_S |
|
|
|
|
|
|
|
|
-29.497+ |
-19.051 |
13.423 |
-18.165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(15.921) |
(13.098) |
(26.146) |
(95.413) |
|
|
|
|
| G_lag |
|
|
|
|
|
|
|
|
|
|
|
|
0.531 |
0.342 |
0.286 |
-1.880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.349) |
(0.305) |
(0.725) |
(2.530) |
| imr_G |
|
|
|
|
|
|
|
|
|
|
|
|
-25.070 |
-13.855 |
14.202 |
-64.479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(14.778) |
(13.389) |
(29.431) |
(139.339) |
| Num.Obs. |
44 |
44 |
43 |
37 |
44 |
44 |
43 |
37 |
44 |
44 |
43 |
37 |
44 |
44 |
43 |
37 |
| R2 |
0.103 |
0.291 |
0.490 |
0.650 |
0.106 |
0.291 |
0.500 |
0.646 |
0.096 |
0.299 |
0.488 |
0.646 |
0.122 |
0.302 |
0.489 |
0.669 |
| Std.Errors |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
| FE: gvkey |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
| FE: year |
|
|
|
X |
|
|
|
X |
|
|
|
X |
|
|
|
X |
#Regression with Heckman Selection Model using ESG scores
# Create selection indicators
df2 <- joint_data |>
arrange(gvkey, year) |>
group_by(gvkey) |>
mutate(
ESG_obs = if_else(!is.na(ESG_composite_score), 1, 0),
E_obs = if_else(!is.na(E_score), 1, 0),
S_obs = if_else(!is.na(S_score), 1, 0),
G_obs = if_else(!is.na(G_score), 1, 0)
) |>
ungroup() |>
mutate(
log_capex = log(capex + 1),
subindustry_code = as.factor(subindustry_code),
year = as.integer(year)
) |>
filter(!is.na(log_capex), !is.na(year), !is.na(subindustry_code))
# 3) Function to create IMR from first-stage probit
add_imr <- function(data, sel_var, imr_name) {
f <- as.formula(
paste0(sel_var, " ~ log_capex + factor(year) + subindustry_code")
)
sel_mod <- glm(f, data = data, family = binomial(link = "probit"))
xb <- predict(sel_mod, type = "link")
phi <- dnorm(xb)
Phi <- pnorm(xb)
data[[imr_name]] <- ifelse(data[[sel_var]] == 1, phi / pmax(Phi, 1e-8), NA_real_)
data
}
# 4) Add IMRs for each score
df2 <- add_imr(df2, "ESG_obs", "imr_ESG")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df2 <- add_imr(df2, "E_obs", "imr_E")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df2 <- add_imr(df2, "S_obs", "imr_S")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df2 <- add_imr(df2, "G_obs", "imr_G")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# 5) Function to run 4 Heckman-style regressions for ESG variables
run_heckman_set <- function(data, x_var, sel_var, imr_var, label) {
reg_df <- data |>
filter(
.data[[sel_var]] == 1,
!is.na(ROA),
!is.na(.data[[x_var]]),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price),
!is.na(.data[[imr_var]])
)
f1 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + ", imr_var)
)
f2 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var)
)
f3 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey")
)
f4 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey + year")
)
list(
setNames(list(feols(f1, data = reg_df, vcov = ~ gvkey)), paste0(label, " (1)"))[[1]],
setNames(list(feols(f2, data = reg_df, vcov = ~ gvkey)), paste0(label, " (2)"))[[1]],
setNames(list(feols(f3, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm FE (3)"))[[1]],
setNames(list(feols(f4, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm+Year FE (4)"))[[1]]
)
}
# 6) Run model sets
mods2_ESG <- run_heckman_set(df2, "ESG_composite_score", "ESG_obs", "imr_ESG", "Composite ESG")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods2_E <- run_heckman_set(df2, "E_score", "E_obs", "imr_E", "E score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods2_S <- run_heckman_set(df2, "S_score", "S_obs", "imr_S", "S score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods2_G <- run_heckman_set(df2, "G_score", "G_obs", "imr_G", "G score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
output_table2 <- modelsummary(
c(mods2_ESG, mods2_E, mods2_S, mods2_G),
stars = TRUE,
statistic = "({std.error})",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj|Within|Pseudo"
)
output_table2
| |
(1) |
(2) |
(3) |
(4) |
(5) |
(6) |
(7) |
(8) |
(9) |
(10) |
(11) |
(12) |
(13) |
(14) |
(15) |
(16) |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| (Intercept) |
20.154 |
-50.242 |
|
|
30.804 |
-43.438 |
|
|
33.484 |
-45.826 |
|
|
2.420 |
-62.296 |
|
|
|
(39.083) |
(37.854) |
|
|
(35.889) |
(32.150) |
|
|
(36.377) |
(33.549) |
|
|
(36.412) |
(47.363) |
|
|
| ESG_composite_score |
0.551 |
0.120 |
-0.261 |
-2.432+ |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.591) |
(0.608) |
(0.331) |
(1.128) |
|
|
|
|
|
|
|
|
|
|
|
|
| ghg_intensity |
0.005 |
0.005 |
0.002 |
0.009 |
0.006 |
0.006 |
0.002 |
0.009 |
0.006 |
0.006 |
0.002 |
0.010 |
0.003 |
0.003 |
0.000 |
0.008 |
|
(0.006) |
(0.007) |
(0.005) |
(0.009) |
(0.006) |
(0.007) |
(0.005) |
(0.008) |
(0.006) |
(0.007) |
(0.005) |
(0.008) |
(0.005) |
(0.007) |
(0.005) |
(0.009) |
| gas_percent |
0.169 |
0.098 |
1.425 |
2.497 |
0.161 |
0.086 |
1.312 |
3.317 |
0.168 |
0.089 |
1.355 |
2.703 |
0.167 |
0.108 |
1.793 |
2.301 |
|
(0.273) |
(0.244) |
(1.917) |
(1.826) |
(0.246) |
(0.228) |
(1.985) |
(2.403) |
(0.266) |
(0.226) |
(1.747) |
(1.937) |
(0.284) |
(0.268) |
(2.166) |
(1.819) |
| imr_ESG |
-31.266 |
-20.678 |
25.879 |
60.834 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(27.117) |
(25.277) |
(25.134) |
(48.518) |
|
|
|
|
|
|
|
|
|
|
|
|
| oil_price |
|
0.885* |
1.231* |
|
|
0.933* |
1.264* |
|
|
0.920* |
1.272* |
|
|
0.827* |
1.126* |
|
|
|
(0.361) |
(0.485) |
|
|
(0.373) |
(0.525) |
|
|
(0.366) |
(0.514) |
|
|
(0.344) |
(0.438) |
|
| E_score |
|
|
|
|
0.318 |
-0.108 |
-0.416 |
-2.809+ |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.429) |
(0.430) |
(0.540) |
(1.362) |
|
|
|
|
|
|
|
|
| imr_E |
|
|
|
|
-33.575 |
-24.662 |
19.595 |
29.942 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(27.420) |
(25.171) |
(27.713) |
(51.762) |
|
|
|
|
|
|
|
|
| S_score |
|
|
|
|
|
|
|
|
0.340 |
-0.049 |
-0.370 |
-1.889 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.434) |
(0.428) |
(0.378) |
(1.059) |
|
|
|
|
| imr_S |
|
|
|
|
|
|
|
|
-35.687 |
-23.040 |
24.745 |
55.450 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(28.403) |
(26.015) |
(21.612) |
(49.208) |
|
|
|
|
| G_score |
|
|
|
|
|
|
|
|
|
|
|
|
0.818 |
0.434 |
0.298 |
-0.891 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.608) |
(0.658) |
(0.537) |
(1.253) |
| imr_G |
|
|
|
|
|
|
|
|
|
|
|
|
-28.507 |
-17.188 |
34.172 |
98.196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(25.426) |
(25.391) |
(29.736) |
(73.538) |
| Num.Obs. |
55 |
55 |
54 |
48 |
55 |
55 |
54 |
48 |
55 |
55 |
54 |
48 |
55 |
55 |
54 |
48 |
| R2 |
0.094 |
0.185 |
0.504 |
0.598 |
0.086 |
0.186 |
0.507 |
0.622 |
0.088 |
0.185 |
0.506 |
0.599 |
0.113 |
0.194 |
0.505 |
0.569 |
| Std.Errors |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
| FE: gvkey |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
| FE: year |
|
|
|
X |
|
|
|
X |
|
|
|
X |
|
|
|
X |
# Run regression without Heckman selection
reg_df3 <- df2 |>
filter(!is.na(ESG_composite_score),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price))
run_esg_set_fe <- function(data, x_var, label) {
reg_df3 <- data |>
filter(
!is.na(ROA),
!is.na(.data[[x_var]]),
!is.na(ghg_intensity),
!is.na(gas_percent)
)
mods3 <- list(
feols(
as.formula(paste0("ROA ~ ", x_var, " | gvkey + year")),
data = reg_df3,
vcov = ~gvkey
),
feols(
as.formula(paste0("ROA ~ ", x_var, " + ghg_intensity | gvkey + year")),
data = reg_df3,
vcov = ~gvkey
),
feols(
as.formula(paste0("ROA ~ ", x_var, " + gas_percent | gvkey + year")),
data = reg_df3,
vcov = ~gvkey
),
feols(
as.formula(paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent | gvkey + year")),
data = reg_df3,
vcov = ~gvkey
)
)
names(mods3) <- c(
paste0(" (1)"),
paste0(" (2)"),
paste0(" (3)"),
paste0( " (4)")
)
mods3
}
# Run model sets
mods3_ESG <- run_esg_set_fe(reg_df3, "ESG_composite_score", "Composite ESG")
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
mods3_E <- run_esg_set_fe(reg_df3, "E_score", "E Score")
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
mods3_S <- run_esg_set_fe(reg_df3, "S_score", "S Score")
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
mods3_G <- run_esg_set_fe(reg_df3, "G_score", "G Score")
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
output_table3 <- modelsummary(
c(mods3_ESG, mods3_E, mods3_S, mods3_G),
stars = TRUE,
statistic = "({std.error})",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj|Within|Pseudo"
)
output_table3
| |
(1) |
(2) |
(3) |
(4) |
(1) |
(2) |
(3) |
(4) |
(1) |
(2) |
(3) |
(4) |
(1) |
(2) |
(3) |
(4) |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| ESG_composite_score |
-2.739 |
-2.747 |
-2.745+ |
-2.754+ |
|
|
|
|
|
|
|
|
|
|
|
|
|
(1.667) |
(1.592) |
(1.517) |
(1.428) |
|
|
|
|
|
|
|
|
|
|
|
|
| ghg_intensity |
|
0.011 |
|
0.012 |
|
0.009 |
|
0.010 |
|
0.011 |
|
0.012 |
|
0.011 |
|
0.012+ |
|
|
(0.007) |
|
(0.007) |
|
(0.006) |
|
(0.006) |
|
(0.007) |
|
(0.007) |
|
(0.006) |
|
(0.006) |
| gas_percent |
|
|
2.413 |
2.619 |
|
|
3.266 |
3.425 |
|
|
2.627 |
2.840 |
|
|
2.282 |
2.481 |
|
|
|
(2.221) |
(2.138) |
|
|
(2.604) |
(2.519) |
|
|
(2.264) |
(2.187) |
|
|
(2.424) |
(2.341) |
| E_score |
|
|
|
|
-2.826+ |
-2.751+ |
-3.054* |
-2.983+ |
|
|
|
|
|
|
|
|
|
|
|
|
|
(1.333) |
(1.367) |
(1.360) |
(1.407) |
|
|
|
|
|
|
|
|
| S_score |
|
|
|
|
|
|
|
|
-2.052 |
-2.069 |
-2.110 |
-2.133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(1.413) |
(1.363) |
(1.341) |
(1.268) |
|
|
|
|
| G_score |
|
|
|
|
|
|
|
|
|
|
|
|
-1.022 |
-1.110 |
-0.914 |
-0.999 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(1.697) |
(1.618) |
(1.559) |
(1.477) |
| Num.Obs. |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
48 |
| R2 |
0.562 |
0.577 |
0.574 |
0.591 |
0.586 |
0.596 |
0.608 |
0.620 |
0.562 |
0.577 |
0.576 |
0.594 |
0.523 |
0.538 |
0.533 |
0.551 |
| Std.Errors |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
| FE: gvkey |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
| FE: year |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
X |
#trend aligns with that of the bigger sample using Heckman selection model where there is a significant negative relationship between ROA and E score only
#Check missing data - number of companies
n_distinct(esg_score$gvkey)
## [1] 84
n_distinct(financial$gvkey)
## [1] 210
n_distinct(gas_production$gvkey)
## [1] 223
n_distinct(ghg_intensity$gvkey)
## [1] 306
n_distinct(joint_data$gvkey)
## [1] 210
n_distinct(df2$gvkey)
## [1] 199
n_distinct(reg_df3$gvkey)
## [1] 14
#Check whether Heckman selection is significant - ie. whether selection bias matters
summary(mods2_E[[4]])
## OLS estimation, Dep. Var.: ROA
## Observations: 48
## Fixed-effects: gvkey: 12, year: 6
## Standard-errors: Clustered (gvkey)
## Estimate Std. Error t value Pr(>|t|)
## E_score -2.808875 1.362296 -2.061869 0.06366 .
## ghg_intensity 0.009045 0.007940 1.139230 0.27881
## gas_percent 3.317273 2.402967 1.380490 0.19484
## imr_E 29.942302 51.762426 0.578456 0.57461
## ... 1 variable was removed because of collinearity (oil_price)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 32.4 Adj. R2: 0.341566
## Within R2: 0.217103
summary(mods2_ESG[[4]])
## OLS estimation, Dep. Var.: ROA
## Observations: 48
## Fixed-effects: gvkey: 12, year: 6
## Standard-errors: Clustered (gvkey)
## Estimate Std. Error t value Pr(>|t|)
## ESG_composite_score -2.431556 1.128482 -2.15471 0.054201 .
## ghg_intensity 0.009351 0.008562 1.09217 0.298100
## gas_percent 2.496526 1.825810 1.36735 0.198804
## imr_ESG 60.834131 48.517689 1.25385 0.235887
## ... 1 variable was removed because of collinearity (oil_price)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 33.4 Adj. R2: 0.299658
## Within R2: 0.167274
#not significant
# Multicollinearity checks
cor(df2[, c("ghg_intensity", "gas_percent", "oil_price")], use = "complete.obs")
## ghg_intensity gas_percent oil_price
## ghg_intensity 1.0000000 -0.15165434 -0.14686080
## gas_percent -0.1516543 1.00000000 0.06177314
## oil_price -0.1468608 0.06177314 1.00000000
# Check VIF of models without Heckman selection
vif(mods3_ESG[[4]])
## Warning in vif.default(mods3_ESG[[4]]): No intercept: vifs may not be sensible.
## GVIF Df GVIF^(1/(2*Df))
## ESG_composite_score 1.90096 0 Inf
## ghg_intensity 1.90096 0 Inf
## gas_percent 1.90096 0 Inf
vif(mods3_E[[4]])
## Warning in vif.default(mods3_E[[4]]): No intercept: vifs may not be sensible.
## GVIF Df GVIF^(1/(2*Df))
## E_score 3.720113 0 Inf
## ghg_intensity 3.720113 0 Inf
## gas_percent 3.720113 0 Inf
vif(mods3_S[[4]])
## Warning in vif.default(mods3_S[[4]]): No intercept: vifs may not be sensible.
## GVIF Df GVIF^(1/(2*Df))
## S_score 2.421516 0 Inf
## ghg_intensity 2.421516 0 Inf
## gas_percent 2.421516 0 Inf
vif(mods3_G[[4]])
## Warning in vif.default(mods3_G[[4]]): No intercept: vifs may not be sensible.
## GVIF Df GVIF^(1/(2*Df))
## G_score 2.050702 0 Inf
## ghg_intensity 2.050702 0 Inf
## gas_percent 2.050702 0 Inf
#GVIG values are all within 1-5 range -> acceptable range -> no indication of multicollinearity