Load Data

# Load your merged IPO dataset
ipo_data <- read.csv("../data/merged_IPO_2014.csv")

# Ensure year and firm ID are treated as factors
#ipo_data$Year <- as.factor(ipo_data$Year)
ipo_data$ID <- as.factor(ipo_data$ID)

Murali Model

model_hb <- plm(W_TobinsQ ~ 
                  log_Assets_Total+
                  
                  #Management 
                  Founder_is_CEO+
                  CEO_Change_at_IPO+
                  CEO_Change_after_IPO+
                  # TMT turnover since IPO (#/% CXOs that are new since IPO)
                  # CEO’s prior board experience (1=was board member some other company before ipo)
                 #TMT prior board experience (prior experience of other inside board members (CXOs on the board) at some other company prior to ipo)
                  CEO_prior_board_experience+
                  TMT_prior_board_experience+

                  
                  #Controls
                  log_Revenue+
                  Leverage +
                  #Leverage_2 +
                  Company_Age+
                  D_Profitability_pior_IPO+
                  # Sales at IPO
                  #Venture characteristics (backed by VC/……)
                  
                  #governance
                  Voting_Right_Share+
                  #1.   Dual class shares
                  #2.   Founders’ ownership %
                  CEO_Chairman_Duality+
                  CEO_Comp_Link_TSR+
                  Comp_LT_Objectives+
                  Board_Size+
                  Num_Board_Meetings+
                  Independent_Board+
                  Board_skills_percent +
                  
                  factor(Year)+
                  
                ICB_BasicMaterials+
                   ICB_ConsumerDiscretionary+
                   ICB_ConsumerStaples+
                   ICB_Energy+
                   ICB_Financials+
                   ICB_HealthCare+
                   ICB_Industrials+
                   ICB_RealEstate+
                   ICB_Technology+
                   ICB_Telecommunications+
                   ICB_Utilities ,
                 data = ipo_data, index = c("ID", "Year"), model = "pooling", effect= "individual")
 
 coeftest(model_hb, vcov = vcovHC(model_hb, type = "HC1", cluster = "group"))
## 
## t test of coefficients:
## 
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                11.8036282  3.8550728  3.0618 0.002734 **
## log_Assets_Total           -0.3587961  0.2467291 -1.4542 0.148588   
## Founder_is_CEO              0.1940707  0.5856164  0.3314 0.740944   
## CEO_Change_at_IPO           0.0840458  0.7931052  0.1060 0.915789   
## CEO_Change_after_IPO        0.0032949  0.4482367  0.0074 0.994148   
## CEO_prior_board_experience -1.7947066  0.7001477 -2.5633 0.011646 * 
## TMT_prior_board_experience  0.5905009  0.4578455  1.2897 0.199707   
## log_Revenue                -0.0688237  0.0645777 -1.0657 0.288750   
## Leverage                   -0.7879871  0.8995466 -0.8760 0.382850   
## Company_Age                -0.0132916  0.0289316 -0.4594 0.646795   
## D_Profitability_pior_IPO   -0.6594241  0.6376486 -1.0341 0.303217   
## Voting_Right_Share          0.7090793  0.5672764  1.2500 0.213826   
## CEO_Chairman_Duality        0.6154563  0.4527972  1.3592 0.176710   
## CEO_Comp_Link_TSR           0.1140085  0.6011449  0.1897 0.849913   
## Comp_LT_Objectives          0.4242997  0.6084210  0.6974 0.486961   
## Board_Size                  0.0343364  0.1333236  0.2575 0.797217   
## Num_Board_Meetings          0.0346179  0.0505473  0.6849 0.494798   
## Independent_Board           0.0286901  0.0135679  2.1146 0.036609 * 
## Board_skills_percent       -0.0084760  0.0099629 -0.8508 0.396659   
## factor(Year)2016           -0.7399854  0.5894726 -1.2553 0.211880   
## factor(Year)2017           -0.5232791  0.6025015 -0.8685 0.386908   
## factor(Year)2018           -1.2070695  0.6281336 -1.9217 0.057100 . 
## factor(Year)2019           -1.1200756  0.5554405 -2.0166 0.046052 * 
## ICB_BasicMaterials         -1.7032560  1.3394273 -1.2716 0.206048   
## ICB_ConsumerDiscretionary  -1.2757608  0.8259764 -1.5445 0.125179   
## ICB_Energy                 -0.6464227  1.2788999 -0.5055 0.614200   
## ICB_Financials             -0.4954381  1.1272770 -0.4395 0.661117   
## ICB_HealthCare             -0.6835147  0.8119134 -0.8419 0.401600   
## ICB_Industrials            -0.9517540  0.9414566 -1.0109 0.314150   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
knitr::include_graphics("C:/workspace/IPO/Fig/TobinsQ.png")

Descriptive Statisitcs

# Define the variables used in your models
vars <- c('W_TobinsQ','log_Assets_Total','Founder_is_CEO','CEO_Change_at_IPO','CEO_Change_after_IPO','log_Revenue','Leverage','Leverage_2','Company_Age','D_Profitability_pior_IPO','CEO_Chairman_Duality','CEO_Comp_Link_TSR','Comp_LT_Objectives','Board_Size','Num_Board_Meetings','Independent_Board','Board_skills_percent ','Year','CEO_prior_board_experience','TMT_prior_board_experience','Voting_Right_Share','ICB_BasicMaterials','ICB_ConsumerDiscretionary','ICB_ConsumerStaples','ICB_Energy','ICB_Financials','ICB_HealthCare','ICB_Industrials','ICB_RealEstate','ICB_Technology','ICB_Telecommunications','ICB_Utilities')


# Filter to available columns
vars <- intersect(vars, names(ipo_data))

# Select data and drop rows with missing values
desc_data <- ipo_data %>%
  select(all_of(vars)) %>%
  na.omit()

# Compute descriptive stats and round to 2 decimals
desc_stats <- desc_data %>%
  summarise(across(everything(), list(
    Mean = ~round(mean(.), 2),
    SD   = ~round(sd(.), 2),
    Min  = ~round(min(.), 2),
    Max  = ~round(max(.), 2)
  ), .names = "{.col}_{.fn}")) %>%
  t() %>%
  as.data.frame()

# Add readable variable names as a column
colnames(desc_stats) <- "Value"
desc_stats$Variable <- rownames(desc_stats)
rownames(desc_stats) <- NULL

# Reorder columns
desc_stats <- desc_stats %>%
  tidyr::separate(Variable, into = c("Variable", "Statistic"), sep = "_(?=[^_]+$)") %>%
  tidyr::pivot_wider(names_from = Statistic, values_from = Value)



#kable(desc_stats, digits = 2, caption = "Descriptive Statistics of Model Variables")

desc_stats %>%
  kable(digits = 2, caption = "Descriptive Statistics of Model Variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  scroll_box(height = "400px")
Descriptive Statistics of Model Variables
Variable Mean SD Min Max
W_TobinsQ 2.07 2.27 0.00 9.47
log_Assets_Total 20.90 1.99 16.47 25.92
Founder_is_CEO 0.35 0.48 0.00 1.00
CEO_Change_at_IPO 0.06 0.23 0.00 1.00
CEO_Change_after_IPO 0.26 0.50 0.00 2.00
log_Revenue 18.42 5.74 0.00 23.52
Leverage 0.26 0.26 0.00 1.04
Leverage_2 1.54 5.31 -23.29 32.59
Company_Age 5.88 7.33 -6.00 48.00
D_Profitability_pior_IPO 0.59 0.49 0.00 1.00
CEO_Chairman_Duality 0.45 0.50 0.00 1.00
CEO_Comp_Link_TSR 0.20 0.40 0.00 1.00
Comp_LT_Objectives 0.25 0.43 0.00 1.00
Board_Size 8.40 2.07 4.00 14.00
Num_Board_Meetings 7.56 3.46 3.00 23.00
Independent_Board 77.15 14.22 33.33 100.00
Year 2017.70 1.12 2015.00 2019.00
CEO_prior_board_experience 0.98 0.14 0.00 1.00
TMT_prior_board_experience 0.49 0.50 0.00 1.00
Voting_Right_Share 0.12 0.32 0.00 1.00
ICB_BasicMaterials 0.03 0.16 0.00 1.00
ICB_ConsumerDiscretionary 0.21 0.41 0.00 1.00
ICB_ConsumerStaples 0.00 0.00 0.00 0.00
ICB_Energy 0.01 0.08 0.00 1.00
ICB_Financials 0.20 0.40 0.00 1.00
ICB_HealthCare 0.33 0.47 0.00 1.00
ICB_Industrials 0.12 0.33 0.00 1.00
ICB_RealEstate 0.00 0.00 0.00 0.00
ICB_Technology 0.10 0.31 0.00 1.00
ICB_Telecommunications 0.00 0.00 0.00 0.00
ICB_Utilities 0.00 0.00 0.00 0.00
# Step 1: Refit your model as an lm model for VIF calculation (same formula)
lm_model <- lm(W_TobinsQ ~ 
                 log_Assets_Total +
                 Founder_is_CEO +
                 CEO_Change_at_IPO +
                 CEO_Change_after_IPO +
                 CEO_prior_board_experience +
                 TMT_prior_board_experience +
                 log_Revenue +
                 Leverage +
                 Company_Age +
                 D_Profitability_pior_IPO +
                 Voting_Right_Share +
                 CEO_Chairman_Duality +
                 CEO_Comp_Link_TSR +
                 Comp_LT_Objectives +
                 Board_Size +
                 Num_Board_Meetings +
                 Independent_Board +
                 Board_skills_percent +
                 factor(Year) ,   # Let R handle dummy coding
               data = ipo_data)


vif_values <- vif(lm_model)

# Step 3: View the results
print(vif_values)
##                                GVIF Df GVIF^(1/(2*Df))
## log_Assets_Total           2.870942  1        1.694385
## Founder_is_CEO             1.486594  1        1.219259
## CEO_Change_at_IPO          1.236087  1        1.111795
## CEO_Change_after_IPO       1.424252  1        1.193420
## CEO_prior_board_experience 1.554132  1        1.246648
## TMT_prior_board_experience 1.478495  1        1.215934
## log_Revenue                1.878326  1        1.370520
## Leverage                   1.612598  1        1.269881
## Company_Age                2.264415  1        1.504797
## D_Profitability_pior_IPO   1.833932  1        1.354228
## Voting_Right_Share         1.593313  1        1.262265
## CEO_Chairman_Duality       1.428608  1        1.195244
## CEO_Comp_Link_TSR          1.415026  1        1.189549
## Comp_LT_Objectives         1.415310  1        1.189668
## Board_Size                 1.668519  1        1.291712
## Num_Board_Meetings         1.473933  1        1.214057
## Independent_Board          1.593926  1        1.262508
## Board_skills_percent       1.378780  1        1.174215
## factor(Year)               1.589029  4        1.059599
selected_data <- ipo_data[, c('W_TobinsQ','log_Assets_Total','Founder_is_CEO','CEO_Change_at_IPO','CEO_Change_after_IPO','log_Revenue','Leverage','Leverage_2','Company_Age','D_Profitability_pior_IPO','CEO_Chairman_Duality','CEO_Comp_Link_TSR','Comp_LT_Objectives','Board_Size','Num_Board_Meetings','Independent_Board','Board_skills_percent','CEO_prior_board_experience','TMT_prior_board_experience','Voting_Right_Share')]

# controls:
#   'ICB_BasicMaterials','ICB_ConsumerDiscretionary','ICB_ConsumerStaples','ICB_Energy','ICB_Financials','ICB_HealthCare','ICB_Industrials','ICB_RealEstate','ICB_Technology','ICB_Telecommunications','ICB_Utilities'

zero_var_cols <- sapply(selected_data, function(x) var(x, na.rm = TRUE) == 0 | all(is.na(x)))
selected_data_clean <- selected_data[, !zero_var_cols]

correlation_matrix <- cor(selected_data, use = "pairwise.complete.obs")
if (exists("correlation_matrix")) {
  na_locs <- which(!is.finite(correlation_matrix), arr.ind = TRUE)
  problem_vars <- unique(c(rownames(correlation_matrix)[na_locs[, 1]],
                           colnames(correlation_matrix)[na_locs[, 2]]))
  cat("Variables with non-finite correlations:\n")
  print(problem_vars)
} else {
  warning("correlation_matrix not found. Please run the correlation calculation first.")
}
## Variables with non-finite correlations:
## character(0)
selected_data_final <- selected_data_clean[, !(names(selected_data_clean) %in% problem_vars)]
correlation_matrix_final <- cor(selected_data_final, use = "pairwise.complete.obs")

corrplot(correlation_matrix_final,
         method = "number",
         type = "upper",
         order = "hclust",
         tl.cex = 0.6,
         tl.col = "black",
         tl.srt = 45,
         number.cex = 0.5,
         addCoef.col = "black",
         cl.cex = 0.6,
         mar = c(0, 0, 2, 0))



Using LASSO to select the best model

I believe that our main analysis should focus on the variables that were selected using Lasso this would give us a good justification of what are the main influences for a Tobin’s Q in a firm in the long run.

# # Full data
# lasso_data <- ipo_data %>%
#   select(W_TobinsQ, W_Leverage, Audit_Expertise, BaaCorpBond, Board_Size, Board_skills_percent, 
# CEO_Board_Member, CEO_Chairman_Duality, CEO_Change_after_IPO, CEO_Comp_Link_TSR, 
# Comp_Controversies_Score, Comp_LT_Objectives, D_Profitability_pior_IPO, ESG_Score, 
# Founder_is_CEO, GDP, Governance_Score, Independent_Board, Num_Board_Meetings, 
# PCE, PerCapitaIncome, PerCapitaPCE, Personal_Income, Profitability_prior_IPO, 
# ROA_Actual, RealEstate, RealEstate_scaled_robust, RealGDP_Millions, 
# RealPersonalIncome_Millions, SP500, ThreeMonthTBill, Total_Employment, 
# USTenYearBond, log_Capex_Total, log_Net_Cash_Flow_Opera, log_Revenue, 
# log_Total_Liabilities, Gold
# ) %>%
#   na.omit()
# 
# # STEP 1: Select relevant variables and remove missing values
# # lasso_data <- ipo_data %>%
# #   select(W_TobinsQ, W_Leverage, log_Total_Liabilities, log_Net_Cash_Flow_Opera,
# #          log_Revenue, ROA_Actual, CEO_Comp_Link_TSR, CEO_Chairman_Duality,D_Profitability_pior_IPO,
# #          CEO_Board_Member, Founder_is_CEO, CEO_Change_after_IPO, Comp_LT_Objectives,
# #          Board_Size, Board_skills_percent, Independent_Board, Num_Board_Meetings,
# #          ESG_Score, Governance_Score, Comp_Controversies_Score, Audit_Expertise) %>%
# #   na.omit()
# 
# # STEP 2: Prepare predictor matrix (X) and response vector (y)
# x <- model.matrix(W_TobinsQ ~ ., data = lasso_data)[, -1]  # remove intercept column
# y <- lasso_data$W_TobinsQ
# 
# # STEP 3: Run cross-validated LASSO
# set.seed(123)
# cv_lasso <- cv.glmnet(x, y, alpha = 1, nfolds = 10)  # alpha = 1 for LASSO
# 
# # Plot CV errors and selected lambda
# plot(cv_lasso)
# abline(v = log(cv_lasso$lambda.min), col = "red", lty = 2)
# title("LASSO Cross-Validation", line = 2.5)
# 
# # STEP 4: Fit LASSO with best lambda
# best_lambda <- cv_lasso$lambda.min
# lasso_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
# 
# # STEP 5: Show coefficients retained by LASSO
# selected_coefs <- coef(lasso_model)
# nonzero_coefs <- selected_coefs[which(selected_coefs != 0), , drop = FALSE]
# print(nonzero_coefs)
# 
# # STEP 6: Refit a standard linear model using selected variables
# # Get variable names of selected predictors (excluding intercept)
# selected_vars <- rownames(nonzero_coefs)[-1]
# 
# # Build formula dynamically
# lasso_formula <- as.formula(paste("W_TobinsQ ~", paste(selected_vars, collapse = " + ")))
# 
# # Refit model on original data (you can optionally standardize predictors here)
# final_model <- lm(lasso_formula, data = lasso_data)
# 
# # STEP 7: Summary of the final model
# summary(final_model)

References

  • Brauer, M. F. (2013). The effects of short-term and long-term oriented managerial behavior on medium-term financial performance: Longitudinal evidence from Europe. Journal of Business Economics and Management, 14(2), 386–402. https://doi.org/10.3846/16111699.2012.703965

  • Connelly, B. L., Certo, S. T., Ireland, R. D., & Reutzel, C. R. (2011). Signaling theory: A review and assessment. Journal of Management, 37(1), 39–67. https://doi.org/10.1177/0149206310388419

  • Eccles, R. G., Ioannou, I., & Serafeim, G. (2014). The impact of corporate sustainability on organizational processes and performance. Management Science, 60(11), 2835–2857. https://doi.org/10.1287/mnsc.2014.1984

  • Fama, E. F., & Jensen, M. C. (1998). Separation of ownership and control. SSRN Electronic Journal. https://doi.org/10.2139/ssrn.94034

  • Hambrick, D. C., & Mason, P. A. (1984). Upper echelons: The organization as a reflection of its top managers. The Academy of Management Review, 9(2), 193–206. https://doi.org/10.2307/258434

  • Suchman, M. C. (1995). Managing legitimacy: Strategic and institutional approaches. The Academy of Management Review, 20(3), 571–610. https://doi.org/10.2307/258788


check

📚 References