# Load packages
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(fixest)
library(sampleSelection)
## Loading required package: maxLik
## Loading required package: miscTools
##
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
##
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
library(modelsummary)
financial <- read_excel("GISC Financial.xlsx")|>
filter(`(costat) Active/Inactive Status Marker`== "A", `(fic) Current ISO Country Code - Incorporation` == "USA") |>
select(subindustry_code = `(gsubind) GIC Sub-Industries`,
date = `(datadate) Data Date`,
gvkey = `(gvkey) Global Company Key`,
name = `(conm) Company Name`,
total_asset = `(at) Assets - Total`,
current_debt = `(dlc) Debt in Current Liabilities - Total`,
long_term_debt = `(dltt) Long-Term Debt - Total`,
net_income = `(ni) Net Income (Loss)`,
capex = `(capx) Capital Expenditures`) |>
mutate(total_debt = current_debt + long_term_debt,
ROA = 100*net_income/(total_asset - total_debt),
year = year(date),
gvkey = as.character(gvkey)) |>
select(-total_asset, -current_debt, -long_term_debt, -net_income, -total_debt, -date)
ghg_intensity = read_csv("GHG intensity.csv") |>
mutate(year = year(periodenddate), gvkey = as.character(gvkey)) |>
select(gvkey, year, ghg_intensity = di_319405)
## Rows: 1926 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): gvkey, ticker, companyname, country
## dbl (4): companyid, institutionid, fiscalyear, di_319405
## date (1): periodenddate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
esg_score <- read_csv("ESG Global Score.csv") |>
filter(
csascoretypename == "Raw",
aspectname %in% c(
"S&P Global ESG Score",
"Environmental Dimension",
"Social Dimension",
"Economic Governance Dimension"
)
) |>
transmute(
scoredate,
year = lubridate::year(scoredate),
gvkey = as.character(gvkey),
score_type = aspectname,
scorevalue
) |>
arrange(gvkey, year, score_type, scoredate) |>
group_by(gvkey, year, score_type) |>
slice_tail(n = 1) |>
ungroup() |>
select(-scoredate) |>
pivot_wider(
names_from = score_type,
values_from = scorevalue
) |>
rename(
ESG_composite_score = `S&P Global ESG Score`,
E_score = `Environmental Dimension`,
S_score = `Social Dimension`,
G_score = `Economic Governance Dimension`
)
## Rows: 5624 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): csascoretypename, scoretype, aspectname, ticker, companyname, country
## dbl (3): institutionid, scorevalue, gvkey
## date (1): scoredate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gas_production <- read_csv("Oil and gas production.csv") |>
select(year = datadate, gvkey, natural_gas_production = ogpngq, oil_production = ogpoilq, country = fic) |>
filter(country == "USA", !is.na(natural_gas_production), !is.na(oil_production)) |>
mutate(year = year(year))|>
group_by(gvkey, year) |>
mutate(annual_total_gas_production = sum(natural_gas_production,na.rm = TRUE),
annual_total_oil_production = sum(oil_production, na.rm = TRUE),
gvkey = as.character(gvkey)) |>
slice(1) |>
mutate(
gas_boe = annual_total_gas_production / 6000,
total_boe = annual_total_oil_production + gas_boe,
gas_percent = 100*gas_boe / total_boe
) |>
select(year, gvkey, gas_percent)
## Rows: 18531 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): datafmt, indfmt, consol, gvkey, conm, tic, fic, conml
## dbl (3): gsubind, ogpngq, ogpoilq
## date (1): datadate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
brent_oil <- read_csv("DCOILBRENTEU.csv") |>
mutate(year = year(observation_date)) |>
rename("oil_price" = DCOILBRENTEU) |>
filter(!is.na(oil_price)) |>
group_by (year) |>
summarize(oil_price = mean(oil_price))
## Rows: 4221 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): DCOILBRENTEU
## date (1): observation_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
joint_data <- financial |>
left_join(esg_score, by = join_by(gvkey, year)) |>
left_join(ghg_intensity, by = join_by(gvkey, year)) |>
left_join(gas_production, by = join_by(gvkey, year)) |>
left_join(brent_oil, by = join_by(year))
n_distinct(esg_score$gvkey)
## [1] 84
n_distinct(financial$gvkey)
## [1] 210
n_distinct(gas_production$gvkey)
## [1] 223
n_distinct(ghg_intensity$gvkey)
## [1] 306
#Regression with Heckman Selection Model using lagged ESG scores
# Create lagged ESG variables and selection indicators
df <- joint_data |>
arrange(gvkey, year) |>
group_by(gvkey) |>
mutate(
ESG_lag = lag(ESG_composite_score),
E_lag = lag(E_score),
S_lag = lag(S_score),
G_lag = lag(G_score),
ESG_obs = if_else(!is.na(ESG_lag), 1, 0),
E_obs = if_else(!is.na(E_lag), 1, 0),
S_obs = if_else(!is.na(S_lag), 1, 0),
G_obs = if_else(!is.na(G_lag), 1, 0)
) |>
ungroup() |>
mutate(
log_capex = log(capex + 1),
subindustry_code = as.factor(subindustry_code),
year = as.integer(year)
) |>
filter(!is.na(log_capex), !is.na(year), !is.na(subindustry_code))
# 3) Function to create IMR from first-stage probit
add_imr <- function(data, sel_var, imr_name) {
f <- as.formula(
paste0(sel_var, " ~ log_capex + factor(year) + subindustry_code")
)
sel_mod <- glm(f, data = data, family = binomial(link = "probit"))
xb <- predict(sel_mod, type = "link")
phi <- dnorm(xb)
Phi <- pnorm(xb)
data[[imr_name]] <- ifelse(data[[sel_var]] == 1, phi / pmax(Phi, 1e-8), NA_real_)
data
}
# 4) Add IMRs for each score
df <- add_imr(df, "ESG_obs", "imr_ESG")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df <- add_imr(df, "E_obs", "imr_E")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df <- add_imr(df, "S_obs", "imr_S")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df <- add_imr(df, "G_obs", "imr_G")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# 5) Function to run 4 Heckman-style regressions for any lagged ESG variable
run_heckman_set <- function(data, x_var, sel_var, imr_var, label) {
reg_df <- data |>
filter(
.data[[sel_var]] == 1,
!is.na(ROA),
!is.na(.data[[x_var]]),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price),
!is.na(.data[[imr_var]])
)
f1 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + ", imr_var)
)
f2 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var)
)
f3 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey")
)
f4 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey + year")
)
list(
setNames(list(feols(f1, data = reg_df, vcov = ~ gvkey)), paste0(label, " (1)"))[[1]],
setNames(list(feols(f2, data = reg_df, vcov = ~ gvkey)), paste0(label, " (2)"))[[1]],
setNames(list(feols(f3, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm FE (3)"))[[1]],
setNames(list(feols(f4, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm+Year FE (4)"))[[1]]
)
}
# 6) Run model sets
mods_ESG <- run_heckman_set(df, "ESG_lag", "ESG_obs", "imr_ESG", "Composite ESG")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods_E <- run_heckman_set(df, "E_lag", "E_obs", "imr_E", "E score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods_S <- run_heckman_set(df, "S_lag", "S_obs", "imr_S", "S score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods_G <- run_heckman_set(df, "G_lag", "G_obs", "imr_G", "G score")
## NOTE: 1 fixed-effect singleton was removed (1 observation).
## NOTE: 1/6 fixed-effect singletons were removed (7 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
output_table <- modelsummary(
c(mods_ESG, mods_E, mods_S, mods_G),
stars = TRUE,
statistic = "({std.error})",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj|Within|Pseudo"
)
output_table
| |
(1) |
(2) |
(3) |
(4) |
(5) |
(6) |
(7) |
(8) |
(9) |
(10) |
(11) |
(12) |
(13) |
(14) |
(15) |
(16) |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| (Intercept) |
26.404 |
-53.129 |
|
|
26.408 |
-54.323 |
|
|
37.234 |
-49.976 |
|
|
14.800 |
-66.713 |
|
|
|
(27.120) |
(38.045) |
|
|
(24.504) |
(37.689) |
|
|
(23.378) |
(36.652) |
|
|
(25.238) |
(48.224) |
|
|
| ESG_lag |
0.328 |
-0.009 |
0.355 |
-0.849 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.388) |
(0.343) |
(0.704) |
(1.672) |
|
|
|
|
|
|
|
|
|
|
|
|
| ghg_intensity |
0.002 |
0.006 |
0.004 |
0.024 |
0.001 |
0.005 |
0.003 |
0.018 |
0.002 |
0.007 |
0.004 |
0.022 |
0.000 |
0.004 |
0.004 |
0.030 |
|
(0.004) |
(0.006) |
(0.007) |
(0.018) |
(0.005) |
(0.006) |
(0.007) |
(0.014) |
(0.004) |
(0.005) |
(0.008) |
(0.013) |
(0.005) |
(0.007) |
(0.007) |
(0.022) |
| gas_percent |
0.177 |
0.075 |
1.005 |
2.555 |
0.182 |
0.079 |
1.238 |
1.380 |
0.163 |
0.049 |
0.929 |
2.111 |
0.193 |
0.103 |
1.054 |
3.983 |
|
(0.200) |
(0.228) |
(1.767) |
(2.538) |
(0.199) |
(0.233) |
(1.705) |
(1.442) |
(0.191) |
(0.191) |
(1.668) |
(1.909) |
(0.196) |
(0.249) |
(1.994) |
(3.619) |
| imr_ESG |
-26.163 |
-17.004 |
16.106 |
-28.918 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(15.904) |
(13.363) |
(28.475) |
(114.589) |
|
|
|
|
|
|
|
|
|
|
|
|
| oil_price |
|
0.963+ |
1.011 |
|
|
0.957+ |
0.999 |
|
|
1.018+ |
1.016 |
|
|
0.930+ |
1.019 |
|
|
|
(0.486) |
(0.603) |
|
|
(0.487) |
(0.562) |
|
|
(0.512) |
(0.598) |
|
|
(0.470) |
(0.603) |
|
| E_lag |
|
|
|
|
0.308 |
0.032 |
0.575 |
0.404 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.298) |
(0.325) |
(0.577) |
(0.902) |
|
|
|
|
|
|
|
|
| imr_E |
|
|
|
|
-24.523 |
-16.362 |
26.176 |
1.476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(15.835) |
(13.089) |
(28.389) |
(102.198) |
|
|
|
|
|
|
|
|
| S_lag |
|
|
|
|
|
|
|
|
0.094 |
-0.277 |
0.216 |
-0.370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.308) |
(0.324) |
(0.411) |
(0.651) |
|
|
|
|
| imr_S |
|
|
|
|
|
|
|
|
-29.497+ |
-19.051 |
13.423 |
-18.165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(15.921) |
(13.098) |
(26.146) |
(95.413) |
|
|
|
|
| G_lag |
|
|
|
|
|
|
|
|
|
|
|
|
0.531 |
0.342 |
0.286 |
-1.880 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.349) |
(0.305) |
(0.725) |
(2.530) |
| imr_G |
|
|
|
|
|
|
|
|
|
|
|
|
-25.070 |
-13.855 |
14.202 |
-64.479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(14.778) |
(13.389) |
(29.431) |
(139.339) |
| Num.Obs. |
44 |
44 |
43 |
37 |
44 |
44 |
43 |
37 |
44 |
44 |
43 |
37 |
44 |
44 |
43 |
37 |
| R2 |
0.103 |
0.291 |
0.490 |
0.650 |
0.106 |
0.291 |
0.500 |
0.646 |
0.096 |
0.299 |
0.488 |
0.646 |
0.122 |
0.302 |
0.489 |
0.669 |
| Std.Errors |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
| FE: gvkey |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
| FE: year |
|
|
|
X |
|
|
|
X |
|
|
|
X |
|
|
|
X |
#Regression with Heckman Selection Model using ESG scores
# Create lagged ESG variables and selection indicators
df2 <- joint_data |>
arrange(gvkey, year) |>
group_by(gvkey) |>
mutate(
ESG_obs = if_else(!is.na(ESG_composite_score), 1, 0),
E_obs = if_else(!is.na(E_score), 1, 0),
S_obs = if_else(!is.na(S_score), 1, 0),
G_obs = if_else(!is.na(G_score), 1, 0)
) |>
ungroup() |>
mutate(
log_capex = log(capex + 1),
subindustry_code = as.factor(subindustry_code),
year = as.integer(year)
) |>
filter(!is.na(log_capex), !is.na(year), !is.na(subindustry_code))
# 3) Function to create IMR from first-stage probit
add_imr <- function(data, sel_var, imr_name) {
f <- as.formula(
paste0(sel_var, " ~ log_capex + factor(year) + subindustry_code")
)
sel_mod <- glm(f, data = data, family = binomial(link = "probit"))
xb <- predict(sel_mod, type = "link")
phi <- dnorm(xb)
Phi <- pnorm(xb)
data[[imr_name]] <- ifelse(data[[sel_var]] == 1, phi / pmax(Phi, 1e-8), NA_real_)
data
}
# 4) Add IMRs for each score
df2 <- add_imr(df, "ESG_obs", "imr_ESG")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df2 <- add_imr(df, "E_obs", "imr_E")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df2 <- add_imr(df, "S_obs", "imr_S")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df2 <- add_imr(df, "G_obs", "imr_G")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# 5) Function to run 4 Heckman-style regressions for any lagged ESG variable
run_heckman_set <- function(data, x_var, sel_var, imr_var, label) {
reg_df <- data |>
filter(
.data[[sel_var]] == 1,
!is.na(ROA),
!is.na(.data[[x_var]]),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price),
!is.na(.data[[imr_var]])
)
f1 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + ", imr_var)
)
f2 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var)
)
f3 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey")
)
f4 <- as.formula(
paste0("ROA ~ ", x_var, " + ghg_intensity + gas_percent + oil_price + ", imr_var, " | gvkey + year")
)
list(
setNames(list(feols(f1, data = reg_df, vcov = ~ gvkey)), paste0(label, " (1)"))[[1]],
setNames(list(feols(f2, data = reg_df, vcov = ~ gvkey)), paste0(label, " (2)"))[[1]],
setNames(list(feols(f3, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm FE (3)"))[[1]],
setNames(list(feols(f4, data = reg_df, vcov = ~ gvkey)), paste0(label, " Firm+Year FE (4)"))[[1]]
)
}
# 6) Run model sets
mods2_ESG <- run_heckman_set(df2, "ESG_composite_score", "ESG_obs", "imr_ESG", "Composite ESG")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 4/5 fixed-effect singletons were removed (9 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods2_E <- run_heckman_set(df2, "E_score", "E_obs", "imr_E", "E score")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 4/5 fixed-effect singletons were removed (9 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods2_S <- run_heckman_set(df2, "S_score", "S_obs", "imr_S", "S score")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 4/5 fixed-effect singletons were removed (9 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
mods2_G <- run_heckman_set(df2, "G_score", "G_obs", "imr_G", "G score")
## NOTE: 3 fixed-effect singletons were removed (3 observations).
## NOTE: 4/5 fixed-effect singletons were removed (9 observations).
## The variable 'oil_price' has been removed because of collinearity (see
## $collin.var).
output_table2 <- modelsummary(
c(mods2_ESG, mods2_E, mods2_S, mods2_G),
stars = TRUE,
statistic = "({std.error})",
gof_omit = "AIC|BIC|Log.Lik|RMSE|Adj|Within|Pseudo"
)
output_table2
| |
(1) |
(2) |
(3) |
(4) |
(5) |
(6) |
(7) |
(8) |
(9) |
(10) |
(11) |
(12) |
(13) |
(14) |
(15) |
(16) |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| (Intercept) |
19.003 |
-54.141 |
|
|
25.355 |
-54.403 |
|
|
32.002 |
-62.065 |
|
|
1.346 |
-60.168 |
|
|
|
(35.409) |
(36.053) |
|
|
(31.354) |
(34.035) |
|
|
(29.785) |
(38.249) |
|
|
(40.002) |
(44.222) |
|
|
| ESG_composite_score |
0.459 |
-0.496 |
0.199 |
-1.186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.470) |
(0.374) |
(0.469) |
(1.445) |
|
|
|
|
|
|
|
|
|
|
|
|
| ghg_intensity |
0.003 |
0.008 |
0.002 |
0.031 |
0.003 |
0.009 |
0.005 |
0.031* |
0.004 |
0.008 |
0.001 |
0.031 |
0.002 |
0.007 |
0.000 |
0.032 |
|
(0.006) |
(0.007) |
(0.008) |
(0.017) |
(0.006) |
(0.006) |
(0.008) |
(0.010) |
(0.005) |
(0.006) |
(0.008) |
(0.020) |
(0.007) |
(0.008) |
(0.008) |
(0.023) |
| gas_percent |
0.119 |
0.067 |
-0.974 |
3.436 |
0.126 |
0.057 |
-1.336 |
2.891+ |
0.127 |
0.056 |
-0.837 |
3.339 |
0.103 |
0.069 |
-0.925 |
3.125 |
|
(0.216) |
(0.220) |
(2.577) |
(2.897) |
(0.201) |
(0.223) |
(2.878) |
(1.412) |
(0.212) |
(0.194) |
(2.441) |
(3.456) |
(0.214) |
(0.249) |
(2.540) |
(3.529) |
| imr_ESG |
-26.552 |
-25.374 |
-0.494 |
121.709 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(23.541) |
(22.618) |
(41.000) |
(142.764) |
|
|
|
|
|
|
|
|
|
|
|
|
| oil_price |
|
1.257+ |
1.031 |
|
|
1.320* |
1.234 |
|
|
1.292+ |
0.985 |
|
|
1.133+ |
0.987 |
|
|
|
(0.582) |
(0.728) |
|
|
(0.590) |
(0.753) |
|
|
(0.589) |
(0.666) |
|
|
(0.553) |
(0.725) |
|
| E_score |
|
|
|
|
0.311 |
-0.572 |
-0.769 |
-3.314*** |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.352) |
(0.395) |
(0.881) |
(0.610) |
|
|
|
|
|
|
|
|
| imr_E |
|
|
|
|
-26.989 |
-29.252 |
-23.508 |
185.420+ |
|
|
|
|
|
|
|
|
|
|
|
|
|
(23.005) |
(22.760) |
(49.966) |
(80.483) |
|
|
|
|
|
|
|
|
| S_score |
|
|
|
|
|
|
|
|
0.247 |
-0.500 |
0.397 |
-0.231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.356) |
(0.306) |
(0.422) |
(0.508) |
|
|
|
|
| imr_S |
|
|
|
|
|
|
|
|
-30.490 |
-23.796 |
3.139 |
87.185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(23.367) |
(20.930) |
(36.483) |
(138.631) |
|
|
|
|
| G_score |
|
|
|
|
|
|
|
|
|
|
|
|
0.718 |
-0.120 |
0.391 |
0.656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(0.487) |
(0.422) |
(0.574) |
(1.541) |
| imr_G |
|
|
|
|
|
|
|
|
|
|
|
|
-23.663 |
-20.865 |
1.598 |
60.913 |
|
|
|
|
|
|
|
|
|
|
|
|
|
(24.165) |
(23.476) |
(41.623) |
(125.917) |
| Num.Obs. |
35 |
35 |
32 |
26 |
35 |
35 |
32 |
26 |
35 |
35 |
32 |
26 |
35 |
35 |
32 |
26 |
| R2 |
0.100 |
0.329 |
0.393 |
0.625 |
0.093 |
0.341 |
0.406 |
0.723 |
0.090 |
0.339 |
0.397 |
0.617 |
0.125 |
0.315 |
0.396 |
0.619 |
| Std.Errors |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
by: gvkey |
| FE: gvkey |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
|
|
X |
X |
| FE: year |
|
|
|
X |
|
|
|
X |
|
|
|
X |
|
|
|
X |
# Regression only on companies with ESG scores available
# model 1: with lag composite ESG
reg_df2 <- df |>
filter(!is.na(ESG_lag),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price))
model1 = feols(ROA ~ ESG_lag + ghg_intensity + gas_percent | gvkey + year, data = reg_df2)
## NOTES: 2 observations removed because of NA values (LHS: 2).
## 1/6 fixed-effect singletons were removed (7 observations).
summary(model1)
## OLS estimation, Dep. Var.: ROA
## Observations: 37
## Fixed-effects: gvkey: 11, year: 5
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## ESG_lag -0.734308 1.371445 -0.535426 0.598567
## ghg_intensity 0.020573 0.010856 1.895172 0.073389 .
## gas_percent 1.988374 2.918956 0.681194 0.503968
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 24.3 Adj. R2: 0.335084
## Within R2: 0.15955
#model 2: with ESG composite score
reg_df3 <- df |>
filter(!is.na(ESG_composite_score),
!is.na(ghg_intensity),
!is.na(gas_percent),
!is.na(oil_price))
model2 = feols(ROA ~ ESG_composite_score + ghg_intensity + gas_percent | gvkey + year, data = reg_df3)
## NOTES: 3 observations removed because of NA values (LHS: 3).
## 1/6 fixed-effect singletons were removed (7 observations).
summary(model2)
## OLS estimation, Dep. Var.: ROA
## Observations: 48
## Fixed-effects: gvkey: 12, year: 6
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## ESG_composite_score -2.754122 1.556699 -1.769206 0.087752 .
## ghg_intensity 0.011544 0.010762 1.072722 0.292555
## gas_percent 2.618743 2.640316 0.991829 0.329775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 33.6 Adj. R2: 0.313677
## Within R2: 0.153719
#model 3: with E score
model3 =feols(ROA ~ E_score + ghg_intensity + gas_percent + capex | gvkey + year, data = reg_df3)
## NOTES: 3 observations removed because of NA values (LHS: 3).
## 1/6 fixed-effect singletons were removed (7 observations).
summary(model3)
## OLS estimation, Dep. Var.: ROA
## Observations: 48
## Fixed-effects: gvkey: 12, year: 6
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## E_score -3.020232 1.286668 -2.347329 0.026492 *
## ghg_intensity 0.010000 0.010513 0.951231 0.349925
## gas_percent 3.506526 2.603352 1.346928 0.189201
## capex -0.003431 0.005882 -0.583307 0.564528
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 32.2 Adj. R2: 0.347211
## Within R2: 0.223816
#model 4: with S score
model4 = feols(ROA ~ S_score + ghg_intensity + gas_percent + capex | gvkey + year, data = reg_df3)
## NOTES: 3 observations removed because of NA values (LHS: 3).
## 1/6 fixed-effect singletons were removed (7 observations).
summary(model4)
## OLS estimation, Dep. Var.: ROA
## Observations: 48
## Fixed-effects: gvkey: 12, year: 6
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## S_score -2.098446 1.197477 -1.752390 0.091058 .
## ghg_intensity 0.011776 0.010915 1.078940 0.290159
## gas_percent 2.865594 2.683127 1.068005 0.294974
## capex -0.001426 0.006155 -0.231696 0.818521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 33.5 Adj. R2: 0.294263
## Within R2: 0.160859
#model 5: with G score
model5 = feols(ROA ~ G_score + ghg_intensity + gas_percent + capex | gvkey + year, data = reg_df3)
## NOTES: 3 observations removed because of NA values (LHS: 3).
## 1/6 fixed-effect singletons were removed (7 observations).
summary(model5)
## OLS estimation, Dep. Var.: ROA
## Observations: 48
## Fixed-effects: gvkey: 12, year: 6
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## G_score -0.957424 1.758802 -0.544361 0.59066
## ghg_intensity 0.011721 0.011467 1.022134 0.31579
## gas_percent 2.539226 2.821255 0.900034 0.37606
## capex -0.002545 0.006422 -0.396227 0.69505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 35.2 Adj. R2: 0.222528
## Within R2: 0.075564
#trend aligns with that of the bigger sample using Heckman selection model where there is a significant negative relationship between ROA and E score only