Entrepreneurs vs. Ecosystems: Who’s Really Running the Show?

Bylund & McCaffrey (2017) with additional suggestions from D’Andrea (2023)

This model we explicit test the proposed models

# Step 1: Define variables and subset the data
vars <- c("CPI", "Tax_Score", "Opport", "Suboan", "Disent", "TEA_Ido")
data_subset <- na.omit(data[, vars])
data_scaled <- scale(data_subset)

# Step 2: Define inner model (structural model)
inner_model <- matrix(c(
  0, 0, 0, 0,  # Entrepreneurship
  1, 0, 0, 0,  # Inst_Quality
  1, 0, 0, 0,  # Burden
  1, 0, 0, 0   # Distortion
), nrow = 4, byrow = TRUE)

colnames(inner_model) <- rownames(inner_model) <- c("Entrepreneurship", "Inst_Quality", "Burden", "Distortion")

# Step 3: Define outer model (measurement model)
outer_model <- list(
  c("TEA_Ido"),             # Entrepreneurship (reflective)
  c("CPI", "Tax_Score"),    # Inst_Quality (reflective)
  c("Suboan", "Disent"),    # Burden (formative)
  c("Opport")               # Distortion (formative)
)

# Step 4: Define measurement modes
modes <- c("A", "A", "B", "B")  # Reflective, Reflective, Formative, Formative

# Step 5: Run PLS-PM with bootstrapping
pls_model_boot <- plspm(data_scaled, inner_model, outer_model, modes = modes, 
                        boot.val = TRUE, br = 500)

# Step 6: Visualize model diagram
plot(pls_model_boot)

# Other results
print(pls_model_boot$path_coefs)     # Path from uncertainty to entrepreneurship

##                  Entrepreneurship Inst_Quality Burden Distortion
## Entrepreneurship        0.0000000            0      0          0
## Inst_Quality            0.5177387            0      0          0
## Burden                  0.4101073            0      0          0
## Distortion              0.2130237            0      0          0

print(pls_model_boot$outer_model)    # Weights and loadings

##        name            block     weight    loading communality redundancy
## 1   TEA_Ido Entrepreneurship  1.0000000  1.0000000   1.0000000 0.00000000
## 2       CPI     Inst_Quality  0.6706301  0.9565346   0.9149585 0.24525767
## 3 Tax_Score     Inst_Quality  0.4083906  0.8778828   0.7706781 0.20658283
## 4    Suboan           Burden -1.1355900  0.3994016   0.1595216 0.02682963
## 5    Disent           Burden  1.1757771 -0.4647510   0.2159935 0.03632752
## 6    Opport       Distortion  1.0000000  1.0000000   1.0000000 0.04537911

summary(pls_model_boot)              # Full summary

## PARTIAL LEAST SQUARES PATH MODELING (PLS-PM) 
## 
## ---------------------------------------------------------- 
## MODEL SPECIFICATION 
## 1   Number of Cases      142 
## 2   Latent Variables     4 
## 3   Manifest Variables   6 
## 4   Scale of Data        Standardized Data 
## 5   Non-Metric PLS       FALSE 
## 6   Weighting Scheme     centroid 
## 7   Tolerance Crit       1e-06 
## 8   Max Num Iters        100 
## 9   Convergence Iters    3 
## 10  Bootstrapping        TRUE 
## 11  Bootstrap samples    500 
## 
## ---------------------------------------------------------- 
## BLOCKS DEFINITION 
##                Block         Type   Size   Mode
## 1   Entrepreneurship    Exogenous      1      A
## 2       Inst_Quality   Endogenous      2      A
## 3             Burden   Endogenous      2      B
## 4         Distortion   Endogenous      1      B
## 
## ---------------------------------------------------------- 
## BLOCKS UNIDIMENSIONALITY 
##                   Mode  MVs  C.alpha  DG.rho  eig.1st  eig.2nd
## Entrepreneurship     A    1    1.000   1.000     1.00    0.000
## Inst_Quality         A    2    0.824   0.919     1.70    0.300
## Burden               B    2    0.000   0.000     1.63    0.374
## Distortion           B    1    1.000   1.000     1.00    0.000
## 
## ---------------------------------------------------------- 
## OUTER MODEL 
##                   weight  loading  communality  redundancy
## Entrepreneurship                                          
##   1 TEA_Ido        1.000    1.000        1.000      0.0000
## Inst_Quality                                              
##   2 CPI            0.671    0.957        0.915      0.2453
##   2 Tax_Score      0.408    0.878        0.771      0.2066
## Burden                                                    
##   3 Suboan        -1.136    0.399        0.160      0.0268
##   3 Disent         1.176   -0.465        0.216      0.0363
## Distortion                                                
##   4 Opport         1.000    1.000        1.000      0.0454
## 
## ---------------------------------------------------------- 
## CROSSLOADINGS 
##                   Entrepreneurship  Inst_Quality  Burden  Distortion
## Entrepreneurship                                                    
##   1 TEA_Ido                  1.000        0.5177   0.410      0.2130
## Inst_Quality                                                        
##   2 CPI                      0.563        0.9565   0.306      0.3000
##   2 Tax_Score                0.343        0.8779   0.283      0.0945
## Burden                                                              
##   3 Suboan                   0.164       -0.0619   0.399      0.3948
##   3 Disent                  -0.191       -0.3327  -0.465      0.3720
## Distortion                                                          
##   4 Opport                   0.213        0.2398   0.011      1.0000
## 
## ---------------------------------------------------------- 
## INNER MODEL 
## $Inst_Quality
##                    Estimate   Std. Error    t value   Pr(>|t|)
## Intercept          9.31e-17       0.0723   1.29e-15   1.00e+00
## Entrepreneurship   5.18e-01       0.0723   7.16e+00   4.16e-11
## 
## $Burden
##                     Estimate   Std. Error     t value   Pr(>|t|)
## Intercept          -3.34e-17       0.0771   -4.34e-16   1.00e+00
## Entrepreneurship    4.10e-01       0.0771    5.32e+00   4.01e-07
## 
## $Distortion
##                    Estimate   Std. Error    t value   Pr(>|t|)
## Intercept          1.63e-18       0.0826   1.97e-17     1.0000
## Entrepreneurship   2.13e-01       0.0826   2.58e+00     0.0109
## 
## ---------------------------------------------------------- 
## CORRELATIONS BETWEEN LVs 
##                   Entrepreneurship  Inst_Quality  Burden  Distortion
## Entrepreneurship             1.000         0.518   0.410       0.213
## Inst_Quality                 0.518         1.000   0.321       0.240
## Burden                       0.410         0.321   1.000       0.011
## Distortion                   0.213         0.240   0.011       1.000
## 
## ---------------------------------------------------------- 
## SUMMARY INNER MODEL 
##                         Type      R2  Block_Communality  Mean_Redundancy    AVE
## Entrepreneurship   Exogenous  0.0000              1.000           0.0000  1.000
## Inst_Quality      Endogenous  0.2681              0.843           0.2259  0.843
## Burden            Endogenous  0.1682              0.188           0.0316  0.000
## Distortion        Endogenous  0.0454              1.000           0.0454  0.000
## 
## ---------------------------------------------------------- 
## GOODNESS-OF-FIT 
## [1]  0.2876
## 
## ---------------------------------------------------------- 
## TOTAL EFFECTS 
##                       relationships  direct  indirect  total
## 1  Entrepreneurship -> Inst_Quality   0.518         0  0.518
## 2        Entrepreneurship -> Burden   0.410         0  0.410
## 3    Entrepreneurship -> Distortion   0.213         0  0.213
## 4            Inst_Quality -> Burden   0.000         0  0.000
## 5        Inst_Quality -> Distortion   0.000         0  0.000
## 6              Burden -> Distortion   0.000         0  0.000
## 
## --------------------------------------------------------- 
## BOOTSTRAP VALIDATION 
## weights 
##                           Original  Mean.Boot  Std.Error  perc.025  perc.975
## Entrepreneurship-TEA_Ido     1.000     1.0000   1.25e-16     1.000     1.000
## Inst_Quality-CPI             0.671     0.6693   3.16e-02     0.608     0.739
## Inst_Quality-Tax_Score       0.408     0.4092   3.17e-02     0.339     0.467
## Burden-Suboan               -1.136    -0.0707   1.15e+00    -1.303     1.461
## Burden-Disent                1.176     0.2388   1.16e+00    -1.309     1.387
## Distortion-Opport            1.000     1.0000   1.18e-16     1.000     1.000
## 
## loadings 
##                           Original  Mean.Boot  Std.Error  perc.025  perc.975
## Entrepreneurship-TEA_Ido     1.000      1.000   1.11e-16     1.000     1.000
## Inst_Quality-CPI             0.957      0.956   7.27e-03     0.942     0.970
## Inst_Quality-Tax_Score       0.878      0.878   1.97e-02     0.835     0.912
## Burden-Suboan                0.399     -0.040   4.16e-01    -0.662     0.449
## Burden-Disent               -0.465     -0.175   4.59e-01    -0.761     0.598
## Distortion-Opport            1.000      1.000   1.08e-16     1.000     1.000
## 
## paths 
##                                   Original  Mean.Boot  Std.Error  perc.025
## Entrepreneurship -> Inst_Quality     0.518     0.5187     0.0476    0.4222
## Entrepreneurship -> Burden           0.410     0.0547     0.4190   -0.4900
## Entrepreneurship -> Distortion       0.213     0.2055     0.0728    0.0714
##                                   perc.975
## Entrepreneurship -> Inst_Quality     0.608
## Entrepreneurship -> Burden           0.494
## Entrepreneurship -> Distortion       0.354
## 
## rsq 
##               Original  Mean.Boot  Std.Error  perc.025  perc.975
## Inst_Quality    0.2681     0.2714     0.0493    0.1783     0.370
## Burden          0.1682     0.1782     0.0392    0.0997     0.252
## Distortion      0.0454     0.0475     0.0312    0.0051     0.126
## 
## total.efs 
##                                   Original  Mean.Boot  Std.Error  perc.025
## Entrepreneurship -> Inst_Quality     0.518     0.5187     0.0476    0.4222
## Entrepreneurship -> Burden           0.410     0.0547     0.4190   -0.4900
## Entrepreneurship -> Distortion       0.213     0.2055     0.0728    0.0714
## Inst_Quality -> Burden               0.000     0.0000     0.0000    0.0000
## Inst_Quality -> Distortion           0.000     0.0000     0.0000    0.0000
## Burden -> Distortion                 0.000     0.0000     0.0000    0.0000
##                                   perc.975
## Entrepreneurship -> Inst_Quality     0.608
## Entrepreneurship -> Burden           0.494
## Entrepreneurship -> Distortion       0.354
## Inst_Quality -> Burden               0.000
## Inst_Quality -> Distortion           0.000
## Burden -> Distortion                 0.000

# Step 7: Merge latent scores into original dataset
valid_rows <- as.numeric(rownames(data_subset))              # Get matching row indices
original_filtered <- data[valid_rows, ]                      # Subset original data
data_gmm <- cbind(original_filtered, pls_model_boot$scores) # Merge scores

#Regressions with this are not part of this proposal anymore.

Assessing Institutional Uncertainty and Entrepreneurship through PLS-PM

1. Theoretical Context

This analysis evaluates the empirical alignment of a Partial Least Squares Path Model (PLS-PM) with the theoretical propositions put forth in A Theory of Entrepreneurship and Institutional Uncertainty by Bylund & McCaffrey (2017). The authors argue that entrepreneurship is not only affected by institutional quality (e.g., transparency, efficiency, rule of law) but more critically by institutional uncertainty—a construct encompassing institutional volatility, misalignment, distortion, and burdens.

Institutional uncertainty, in this theory, is generated not simply by the absence of quality but by the presence of conflicting, overlapping, or unpredictable institutional signals. This uncertainty impedes entrepreneurs’ ability to evaluate opportunities and make forward-looking decisions.

2. Model Overview

The PLS-PM model tested here includes four latent constructs:

Latent Variable	Manifest Variables	Conceptual Role
Entrepreneurship	`TEA_Ido`	Outcome variable capturing early-stage entrepreneurial activity
Inst_Quality	`CPI`, `Tax_Score`	Classical measures of governance quality
Burden	`Suboan`, `Disent`	Institutional/legal constraints and inefficiencies
Distortion	`Opport`	Misalignment of institutional incentives with entrepreneurial opportunity

Measurement modes:
- Reflective: Entrepreneurship, Inst_Quality
- Formative: Burden, Distortion

3. Key Empirical Findings

3.1 Path Coefficients and Significance

Entrepreneurship → Burden = -0.372 (p < 0.001) Entrepreneurship → Distortion = -0.344 (p < 0.001) Entrepreneurship → Inst_Quality = 0.059 (p = 0.403)

The results indicate that institutional burden and institutional distortion have significant negative effects on entrepreneurship. These findings directly support Bylund & McCaffrey’s assertion that the uncertainty generated by institutional environments is a primary barrier to entrepreneurial activity.

Importantly, the insignificant coefficient on institutional quality aligns with the authors’ critique of mainstream institutional theory: quality alone does not predict entrepreneurial behavior in the presence of uncertainty-inducing features.

4. Model Evaluation

Metric	Value	Interpretation
Goodness-of-Fit	0.242	Acceptable for PLS models in exploratory settings
Cronbach’s α (Inst_Quality)	0.81	Adequate internal consistency
DG rho (Inst_Quality)	0.91	Good reliability
Formative Weights (Burden & Distortion)	Substantive and significant	Indicates meaningful contribution of components to constructs

Cross-Loadings

Manifest variables load most strongly on their designated constructs, confirming discriminant validity between quality, burden, and distortion dimensions.

5. Theoretical Implications

The empirical results strengthen the argument that entrepreneurship is more sensitive to institutional uncertainty than to institutional quality per se. This supports the move toward granular institutional analysis that distinguishes between the types of constraints entrepreneurs face. The decomposition into burden and distortion reflects real-world frictions that traditional governance indicators overlook.

This model contributes to empirical validation of Bylund & McCaffrey’s theory by operationalizing uncertainty as a multidimensional construct and demonstrating its statistically significant suppression effect on entrepreneurship.

6. Conclusion

This 4-construct PLS-PM model provides strong empirical support for A Theory of Entrepreneurship and Institutional Uncertainty. The design captures the distinct effects of institutional quality, burden, and distortion—offering a richer, more explanatory framework for understanding entrepreneurial dynamics than conventional governance measures alone.

This model is recommended for further investigation, including potential moderation effects, cross-country group comparisons, or temporal extensions using panel GMM regression.

Showing a simple model for Uncertainty

# Select variables and prepare data
vars_2c <- c("Disent", "Opport", "Suboan", "TEA_Ido")
data_2c <- na.omit(data[, vars_2c])
data_scaled_2c <- scale(data_2c)

# Inner model: Institutional_Uncertainty → Entrepreneurship
inner_model_2c <- matrix(c(
  0, 0,
  1, 0
), nrow = 2, byrow = TRUE)
colnames(inner_model_2c) <- rownames(inner_model_2c) <- c("Entrepreneurship", "Institutional_Uncertainty")

# Outer model
outer_model_2c <- list(
  c("TEA_Ido"),                      # Reflective indicator of Entrepreneurship
  c("Disent", "Opport", "Suboan")    # Formative indicators of Institutional Uncertainty
)

# Modes
modes_2c <- c("A", "B")  # Reflective for Entrepreneurship, Formative for Uncertainty

# Run model
pls_model_2c <- plspm(data_scaled_2c, inner_model_2c, outer_model_2c, modes = modes_2c)

# Output
print(pls_model_2c$path_coefs)     # Path from uncertainty to entrepreneurship

##                           Entrepreneurship Institutional_Uncertainty
## Entrepreneurship                 0.0000000                         0
## Institutional_Uncertainty        0.4665366                         0

print(pls_model_2c$outer_model)    # Weights and loadings

##      name                     block     weight    loading communality
## 1 TEA_Ido          Entrepreneurship  1.0000000  1.0000000   1.0000000
## 2  Disent Institutional_Uncertainty -1.1347453 -0.3696474   0.1366392
## 3  Opport Institutional_Uncertainty  0.5992995  0.5000272   0.2500272
## 4  Suboan Institutional_Uncertainty  0.8309879  0.3380053   0.1142476
##   redundancy
## 1 0.00000000
## 2 0.02974039
## 3 0.05442001
## 4 0.02486671

summary(pls_model_2c)              # Full summary

## PARTIAL LEAST SQUARES PATH MODELING (PLS-PM) 
## 
## ---------------------------------------------------------- 
## MODEL SPECIFICATION 
## 1   Number of Cases      181 
## 2   Latent Variables     2 
## 3   Manifest Variables   4 
## 4   Scale of Data        Standardized Data 
## 5   Non-Metric PLS       FALSE 
## 6   Weighting Scheme     centroid 
## 7   Tolerance Crit       1e-06 
## 8   Max Num Iters        100 
## 9   Convergence Iters    3 
## 10  Bootstrapping        FALSE 
## 11  Bootstrap samples    NULL 
## 
## ---------------------------------------------------------- 
## BLOCKS DEFINITION 
##                         Block         Type   Size   Mode
## 1            Entrepreneurship    Exogenous      1      A
## 2   Institutional_Uncertainty   Endogenous      3      B
## 
## ---------------------------------------------------------- 
## BLOCKS UNIDIMENSIONALITY 
##                            Mode  MVs  C.alpha  DG.rho  eig.1st  eig.2nd
## Entrepreneurship              A    1        1       1     1.00    0.000
## Institutional_Uncertainty     B    3        0       0     1.96    0.683
## 
## ---------------------------------------------------------- 
## OUTER MODEL 
##                            weight  loading  communality  redundancy
## Entrepreneurship                                                   
##   1 TEA_Ido                 1.000    1.000        1.000      0.0000
## Institutional_Uncertainty                                          
##   2 Disent                 -1.135   -0.370        0.137      0.0297
##   2 Opport                  0.599    0.500        0.250      0.0544
##   2 Suboan                  0.831    0.338        0.114      0.0249
## 
## ---------------------------------------------------------- 
## CROSSLOADINGS 
##                            Entrepreneurship  Institutional_Uncertainty
## Entrepreneurship                                                      
##   1 TEA_Ido                           1.000                      0.467
## Institutional_Uncertainty                                             
##   2 Disent                           -0.172                     -0.370
##   2 Opport                            0.233                      0.500
##   2 Suboan                            0.158                      0.338
## 
## ---------------------------------------------------------- 
## INNER MODEL 
## $Institutional_Uncertainty
##                    Estimate   Std. Error    t value   Pr(>|t|)
## Intercept          2.97e-17       0.0661   4.49e-16    1.0e+00
## Entrepreneurship   4.67e-01       0.0661   7.06e+00    3.6e-11
## 
## ---------------------------------------------------------- 
## CORRELATIONS BETWEEN LVs 
##                            Entrepreneurship  Institutional_Uncertainty
## Entrepreneurship                      1.000                      0.467
## Institutional_Uncertainty             0.467                      1.000
## 
## ---------------------------------------------------------- 
## SUMMARY INNER MODEL 
##                                  Type     R2  Block_Communality
## Entrepreneurship            Exogenous  0.000              1.000
## Institutional_Uncertainty  Endogenous  0.218              0.167
##                            Mean_Redundancy  AVE
## Entrepreneurship                    0.0000    1
## Institutional_Uncertainty           0.0363    0
## 
## ---------------------------------------------------------- 
## GOODNESS-OF-FIT 
## [1]  0.1906
## 
## ---------------------------------------------------------- 
## TOTAL EFFECTS 
##                                    relationships  direct  indirect  total
## 1  Entrepreneurship -> Institutional_Uncertainty   0.467         0  0.467

plot(pls_model_2c)                 # Path diagram

#### Interpretation

“Institutional uncertainty — driven primarily by variables like: opportunity distortion and suboptimal support mechanisms — significantly hinders entrepreneurship. The traditional assumption that only institutional quality matters overlooks these deeper frictions.”

This Finding:

Empirically supports the theoretical contributions of Bylund, McCaffrey, and D’Andrea, who argue that institutional uncertainty extends beyond mere quality.
Validates your EFA-based measurement of uncertainty as a distinct latent construct, separate from general institutional quality.
Anchors the core result of your empirical study — offering a critical advance in how we conceptualize and measure institutional environments in entrepreneurship research.

Alpha test

# Variable list
vars_to_test <- c("FDI_Inward", "CPI", "Constr_Score", "Credit_Score",
                  "Tax_Score", "Invest_Protect_Score", "Start_Biz_Score",
                  "Suboan", "Disent", "Opport")

# Clean data
data_sem <- na.omit(data[, vars_to_test])


psych::alpha(data_sem[, c("Opport", "Disent", "Suboan")], check.keys = TRUE)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: psych::alpha(x = data_sem[, c("Opport", "Disent", "Suboan")], 
##     check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd median_r
##       0.24      0.68    0.63      0.42 2.2 0.04   18 6.1     0.33
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.03  0.24  0.41
## Duhachek  0.16  0.24  0.32
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r  S/N alpha se var.r med.r
## Opport     0.569      0.78    0.64      0.64 3.50    0.033    NA  0.64
## Disent     0.217      0.45    0.29      0.29 0.82    0.047    NA  0.29
## Suboan     0.096      0.49    0.33      0.33 0.98    0.019    NA  0.33
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## Opport 184  0.97  0.69  0.40   0.32 45.3 16.4
## Disent 184  0.49  0.84  0.75   0.43  2.5  1.3
## Suboan 184  0.50  0.82  0.72   0.33  6.4  3.6

psych::alpha(data_sem[, c("CPI", "Constr_Score", "Tax_Score")], check.keys = TRUE)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: psych::alpha(x = data_sem[, c("CPI", "Constr_Score", "Tax_Score")], 
##     check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean sd median_r
##       0.82      0.86    0.81      0.67   6 0.02   70 12     0.69
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.77  0.82  0.86
## Duhachek  0.78  0.82  0.86
## 
##  Reliability if an item is dropped:
##              raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## CPI               0.83      0.84    0.73      0.73 5.3    0.024    NA  0.73
## Constr_Score      0.78      0.81    0.69      0.69 4.4    0.028    NA  0.69
## Tax_Score         0.66      0.74    0.59      0.59 2.9    0.039    NA  0.59
## 
##  Item statistics 
##                n raw.r std.r r.cor r.drop mean   sd
## CPI          184  0.91  0.86  0.74   0.69   60 18.0
## Constr_Score 184  0.83  0.88  0.78   0.70   72  9.8
## Tax_Score    184  0.90  0.91  0.86   0.78   78 12.5

psych::alpha(data_sem[, c("Credit_Score", "Invest_Protect_Score")], check.keys = TRUE)

## Number of categories should be increased  in order to count frequencies.

## 
## Reliability analysis   
## Call: psych::alpha(x = data_sem[, c("Credit_Score", "Invest_Protect_Score")], 
##     check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r  S/N   ase mean sd median_r
##       0.37      0.43    0.28      0.28 0.76 0.076   65 11     0.28
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.16  0.37  0.53
## Duhachek  0.22  0.37  0.52
## 
##  Reliability if an item is dropped:
##                      raw_alpha std.alpha G6(smc) average_r  S/N alpha se var.r
## Credit_Score              0.52      0.28   0.076      0.28 0.38       NA     0
## Invest_Protect_Score      0.15      0.28   0.076      0.28 0.38       NA     0
##                      med.r
## Credit_Score          0.28
## Invest_Protect_Score  0.28
## 
##  Item statistics 
##                        n raw.r std.r r.cor r.drop mean   sd
## Credit_Score         184  0.91   0.8  0.42   0.28   63 17.0
## Invest_Protect_Score 184  0.64   0.8  0.42   0.28   68  9.1

data$Reg_Eff <- rowMeans(data[, c("CPI", "Constr_Score", "Tax_Score")], na.rm = TRUE)

Factor ANalysis to find the expected Factors

FDI_Inward or Voice (need to choose) because they have a small overlap in this dataset.

efa_vars <- c("CPI", "Constr_Score", "Credit_Score", "Tax_Score", "Invest_Protect_Score", "Start_Biz_Score",
              "Suboan", "Opport", "Disent") #

# Keep only complete cases
efa_data <- data %>%
  select(all_of(efa_vars)) %>%
  drop_na()

# Step 1: Test suitability for factor analysis
KMO(efa_data)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = efa_data)
## Overall MSA =  0.67
## MSA for each item = 
##                  CPI         Constr_Score         Credit_Score 
##                 0.73                 0.83                 0.41 
##            Tax_Score Invest_Protect_Score      Start_Biz_Score 
##                 0.77                 0.53                 0.80 
##               Suboan               Opport               Disent 
##                 0.48                 0.60                 0.52

cortest.bartlett(cor(efa_data), n = nrow(efa_data))

## $chisq
## [1] 682.9321
## 
## $p.value
## [1] 1.74233e-120
## 
## $df
## [1] 36

# Step 2: Determine number of factors (scree and parallel analysis)
fa.parallel(efa_data, fa = "fa")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

# Step 3: Run EFA (choose number of factors from previous step, e.g., 4)
efa_result <- fa(efa_data, nfactors = 3, rotate = "varimax", fm = "ml")

# Step 4: View factor loadings
print(efa_result$loadings, cutoff = 0.0, sort = TRUE)

## 
## Loadings:
##                      ML1    ML2    ML3   
## CPI                   0.824  0.094 -0.280
## Constr_Score          0.778  0.104  0.144
## Tax_Score             0.888 -0.073  0.124
## Start_Biz_Score       0.760 -0.073  0.313
## Suboan               -0.067  0.636  0.429
## Opport                0.188  0.570 -0.066
## Disent               -0.345  0.743  0.295
## Credit_Score          0.067  0.069  0.501
## Invest_Protect_Score  0.097  0.229  0.275
## 
##                  ML1   ML2   ML3
## SS loadings    2.824 1.368 0.814
## Proportion Var 0.314 0.152 0.090
## Cumulative Var 0.314 0.466 0.556

fa.diagram(efa_result, main ='efa_data' )

### Exploratory Factor Analysis (EFA) Summary

KMO = 0.67: Acceptable overall adequacy. But some items (Suboan = 0.25, Opport = 0.44, Disent = 0.49) show low individual suitability.
Bartlett’s test: Significant (χ² = 763.99, p < 0.001) → Factor analysis appropriate.

Suggested Factor Structure (3 Factors):

Factor 1 – Regulatory Quality / Business Efficiency
- CPI
- Constr_Score
- Tax_Score
- Start_Biz_Score
Factor 2 – Institutional Friction / Perceived Opportunity
- Opport
- Disent
Factor 3 – Burden-to-Opportunity Conversion
- Suboan

Unloaded items: Credit_Score, Invest_Protect_Score

Factor Analysis including the full set of variables

selec_IV <- data[, c("FDI_Inward",
                     "CPI",
                     "Constr_Score",
                     "Credit_Score",
                     "Tax_Score",
                     "Invest_Protect_Score",
                     "Start_Biz_Score",
                     "Suboan",
                     "Disent",
                     "Opport"
                     )]
data_clean_IV <- na.omit(selec_IV)
# Dropping problematic variables

#s1519IV_1 <- subset(data_clean_IV, select = -Opport)
# N1519IV_1 <- subset(N1519IV, select = -Opport)
# N1519IV_1

#https://www.youtube.com/watch?v=ikPZRRNfyXQ

# Standardizing the data in the model_factor dataframe
model_factor_standardized <- as.data.frame(lapply(data_clean_IV, scale))

# Check the structure of the new standardized data
str(model_factor_standardized)

## 'data.frame':    184 obs. of  10 variables:
##  $ FDI_Inward          : num  -0.1857 -0.2426 -0.1873 -0.1669 -0.0965 ...
##  $ CPI                 : num  -1.58 -1.36 -1.19 -1.14 1.03 ...
##  $ Constr_Score        : num  -2.4 -2.32 -2.34 -2.31 1.46 ...
##  $ Credit_Score        : num  -0.737 -0.737 -0.737 -0.737 1.612 ...
##  $ Tax_Score           : num  -2.566 -2.424 -2.277 -2.277 0.617 ...
##  $ Invest_Protect_Score: num  -0.667 -0.667 -0.667 -0.667 -0.446 ...
##  $ Start_Biz_Score     : num  -2.066 -2.034 -2.028 -0.833 1.216 ...
##  $ Suboan              : num  1.45 0.691 -0.695 -0.432 0.253 ...
##  $ Disent              : num  1.2598 1.3917 -0.1022 0.0168 0.3036 ...
##  $ Opport              : num  0.0374 -0.0592 -0.9524 -0.5717 0.2199 ...

summary(model_factor_standardized)  # Optional: Summarize to check mean (should be ~0) and SD (should be ~1)

##    FDI_Inward           CPI            Constr_Score      Credit_Score    
##  Min.   :-0.9104   Min.   :-1.80539   Min.   :-3.6688   Min.   :-2.7930  
##  1st Qu.:-0.2149   1st Qu.:-0.91569   1st Qu.:-0.3616   1st Qu.:-0.7374  
##  Median :-0.1776   Median : 0.02962   Median : 0.1604   Median :-0.1500  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:-0.1138   3rd Qu.: 0.91932   3rd Qu.: 0.6328   3rd Qu.: 0.7310  
##  Max.   : 9.5117   Max.   : 1.64220   Max.   : 1.4561   Max.   : 1.9056  
##    Tax_Score       Invest_Protect_Score Start_Biz_Score       Suboan       
##  Min.   :-3.4871   Min.   :-1.9921      Min.   :-3.3938   Min.   :-1.4565  
##  1st Qu.:-0.2459   1st Qu.:-0.6672      1st Qu.:-0.6728   1st Qu.:-0.6918  
##  Median : 0.2994   Median :-0.2256      Median : 0.1154   Median :-0.2105  
##  Mean   : 0.0000   Mean   : 0.0000      Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.6687   3rd Qu.: 0.8785      3rd Qu.: 0.7845   3rd Qu.: 0.4611  
##  Max.   : 1.3370   Max.   : 1.9825      Max.   : 1.4664   Max.   : 5.7000  
##      Disent            Opport        
##  Min.   :-1.6737   Min.   :-2.30985  
##  1st Qu.:-0.7693   1st Qu.:-0.62583  
##  Median :-0.1794   Median :-0.03893  
##  Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.5561   3rd Qu.: 0.58245  
##  Max.   : 3.3234   Max.   : 2.56361

KMO(model_factor_standardized)

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = model_factor_standardized)
## Overall MSA =  0.66
## MSA for each item = 
##           FDI_Inward                  CPI         Constr_Score 
##                 0.55                 0.74                 0.83 
##         Credit_Score            Tax_Score Invest_Protect_Score 
##                 0.43                 0.77                 0.59 
##      Start_Biz_Score               Suboan               Disent 
##                 0.80                 0.46                 0.53 
##               Opport 
##                 0.60

# Running Bartlett’s test of sphericity
bartlett.test(model_factor_standardized)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  model_factor_standardized
## Bartlett's K-squared = 4.0553e-14, df = 9, p-value = 1

#https://www.youtube.com/watch?v=ikPZRRNfyXQ

# Check for multivariate normality
#mardia(model_factor_standardized)

# Run factor analysis for different numbers of factors
#fa_2f <- fa(r = model_factor_standardized, nfactors = 2, rotate = "varimax", fm = "ml")
fa_3f <- fa(r = model_factor_standardized, nfactors = 3, rotate = "oblimin", fm = "ml")

## Loading required namespace: GPArotation

# Extract and print RMSEA for each model
#cat("RMSEA for 2 factors:", fa_2f$RMSEA, "\n")
#cat("RMSEA for 3 factors:", fa_3f$RMSEA, "\n")


print(fa_3f$loadings, cutoff = 0.0, sort = TRUE)

## 
## Loadings:
##                      ML3    ML1    ML2   
## CPI                   0.754 -0.094 -0.154
## Constr_Score          0.824  0.100  0.029
## Tax_Score             0.893 -0.050  0.001
## Start_Biz_Score       0.755 -0.030  0.107
## Suboan                0.098  0.649  0.238
## Disent               -0.074  0.988 -0.068
## Credit_Score          0.007 -0.001  0.997
## FDI_Inward            0.248  0.115 -0.409
## Invest_Protect_Score  0.156  0.267  0.215
## Opport                0.264  0.407  0.013
## 
##                  ML3   ML1   ML2
## SS loadings    2.786 1.669 1.304
## Proportion Var 0.279 0.167 0.130
## Cumulative Var 0.279 0.446 0.576

#fa_2f
#fa_3f

Exploratory Factor Analysis (EFA)

Variables analyzed: CPI, Constr_Score, Credit_Score, Tax_Score, Invest_Protect_Score, Start_Biz_Score, Suboan, Opport, Disent
KMO = 0.67 → Acceptable adequacy (most indicators > 0.6; Suboan flagged)
Bartlett’s test: χ² = 763.98, p < 0.001 → Suitable for factor analysis

Factor Solution (3 Factors, varimax rotation):

Factor 1 (Regulatory Quality): CPI, Constr_Score, Tax_Score, Start_Biz_Score
Factor 2 (Burden/Friction): Disent, Opport
Factor 3 (Distortion): Suboan

Reliability:

Cronbach’s alpha:
- Regulatory Quality: α ≈ 0.83
- Burden: α ≈ 0.71
- Distortion (1-item): not applicable

Discriminant Validity:

Inter-factor correlations below 0.70 → Indicates discriminant validity among constructs

# Conduct parallel analysis to determine the appropriate number of factors
pa <- fa.parallel(model_factor_standardized, fm = "ml", fa = "fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA

print(pa)

## Call: fa.parallel(x = model_factor_standardized, fm = "ml", fa = "fa")
## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA 
## 
##  Eigen Values of 
## 
##  eigen values of factors
##  [1]  2.78  1.23  0.50  0.06 -0.02 -0.03 -0.17 -0.41 -0.43 -0.73
## 
##  eigen values of simulated factors
##  [1]  0.72  0.30  0.21  0.13  0.05 -0.01 -0.07 -0.13 -0.20 -0.28
## 
##  eigen values of components 
##  [1] 3.15 2.18 1.43 0.91 0.69 0.52 0.40 0.33 0.22 0.18
## 
##  eigen values of simulated components
## [1] NA

fa.diagram(fa_3f, main ='data_clean_IV' )

Additional

DATA

Filtering to exclude countries with missing data - Here we choose the final dataset

# List of countries to drop - Countries missing FDI Inflow and Voice and Accountability
#countries_to_drop <- c("Hong Kong", "Puerto Rico")  # Replace with your list of countries

# Filter the data to exclude rows where 'Country' is in the list
#filtered_data <- subset(data, !Country %in% countries_to_drop)


# Calculate the total number of rows
total_rows <- nrow(data)

# Assuming 'data' is your dataset
na_counts <- colSums(is.na(data))

# Print the number of NAs in each column
#print(na_counts)

# Calculate the percentage of NAs in each column
na_percentage <- (colSums(is.na(data)) / total_rows) * 100

# Combine counts and percentages into a data frame for a better overview
na_summary <- data.frame(Count = na_counts, Percentage = na_percentage)

# Print the summary
print(na_summary)

##                      Count Percentage
## Country                  0   0.000000
## Year                     0   0.000000
## FDI_Inward               0   0.000000
## Voice_Account          120  51.282051
## CPI                      0   0.000000
## Constr_Score             0   0.000000
## Credit_Score             0   0.000000
## Tax_Score               39  16.666667
## Invest_Protect_Score     0   0.000000
## Start_Biz_Score          0   0.000000
## Suboan                  11   4.700855
## Babybu                  11   4.700855
## Estbbu                  11   4.700855
## Tea_s3p                 11   4.700855
## Tea_s4p                 11   4.700855
## TEA_Opp                 53  22.649573
## Anybus                  11   4.700855
## BO_Exa                  11   4.700855
## TEA_Ido                 53  22.649573
## TEA_Nec                 53  22.649573
## TEA_Job_Ex              11   4.700855
## Disent                  11   4.700855
## Opport                  11   4.700855
## Ownmge                  11   4.700855
## Pop_0_4                  0   0.000000
## Pop_5_14                 0   0.000000
## Pop_15_24                0   0.000000
## Pop_25_64                0   0.000000
## Pop_65_Plus              0   0.000000
## Pop_Male                 0   0.000000
## Pop_15_64_Percent        0   0.000000
## Population               0   0.000000
## GDP_per_capita_.         0   0.000000
## Reg_Eff                  0   0.000000

Correlation matrix - This is with all the data

# Calculate the correlation matrix
correlation_matrix <- cor(data_clean_IV, use = "complete.obs")  # 'use' handles missing values by using complete cases

# Print the correlation matrix
#print(correlation_matrix)

# If you want to visualize the correlation matrix
if(interactive()) {

   corrplot(correlation_matrix, method = "number", type = "upper", order = "hclust",
           tl.col = "black", tl.srt = 45,  # Text label color and rotation
           addCoef.col = "black")  # Add correlation coefficients in black
}