# Single source of truth for Component/Subcomponent labels used in tables/figures/exports.
var_label_map <- c(
X1A = "Government consumption",
X1B = "Transfers and subsidies",
X1C = "Government investment",
X1D_i = "Top marginal income tax rate",
X1D_ii = "Top marginal income and payroll tax rate",
X1E = "State ownership of assets",
X2A_scaled = "Judicial independence*",
X2B_scaled = "Impartial courts*",
X2C_scaled = "Property rights*",
X2D_scaled = "Military interference*",
X2E_scaled = "Legal integrity*",
X2F_scaled = "Contract enforcement*",
X2G_scaled = "Real property restrictions*",
X2H_scaled = "Police reliability*",
X3A = "Money growth",
X3B = "Inflation volatility",
X3C = "Recent inflation",
X3D = "Foreign currency accounts",
X4A_i = "Trade tax revenue",
X4A_ii = "Mean tariff rate",
X4A_iii = "Tariff dispersion",
X4B_i = "Non-tariff barriers",
X4B_ii = "Trade compliance costs",
X4C = "Black market exchange rates",
X4D_i = "Financial openness",
X4D_ii = "Capital controls",
X4D_iii = "Freedom of foreigners to visit",
X4D_iv = "Protection of foreign assets",
X5A_i = "Ownership of banks",
X5A_ii = "Private sector credit",
X5A_iii = "Interest rate controls",
X5B_i = "Hiring regulations and minimum wage",
X5B_ii = "Hiring and firing regulations",
X5B_iii = "Centralized collective bargaining",
X5B_iv = "Hours regulations",
X5B_v = "Mandated cost of worker dismissal",
X5B_vi = "Conscription",
X5B_vii = "Foreign labor",
X5C_i = "Regulatory burden",
X5C_ii = "Bureaucracy costs",
X5C_iii = "Impartial public administration",
X5C_iv = "Tax compliance",
X5D_i = "Market openness",
X5D_ii = "Business permits",
X5D_iii = "Distortion of business environment"
)From Equal Weights to Empirical Weights: Reassessing Economic Freedom Rankings
Why equal weighting might be problematic:
The areas aren’t necessarily of equal importance to economic outcomes or freedom. For instance, having sound property rights and rule of law (Area 2) could be argued as more fundamental than the exact size of government (Area 1). A country could theoretically have a larger government that still respects contracts and property rights. Also, the components within areas vary significantly in their impact. Sound Money has only 4 components while Regulation has many more nested subcomponents, yet each area still gets 20%. Third, different development contexts might require different weightings. What matters most for economic freedom in a developing economy might differ from an advanced economy. We do not address this issue though.
However, there are counterarguments for equal weighting:
It avoids subjective judgments about what’s “more important,” which could introduce ideological bias. Equal weighting is at least transparent and defensible as a neutral starting point. Empirically, one could test whether different weighting schemes would materially change country rankings or correlations with outcomes. If the rankings are robust to different weighting approaches, then the equal weighting critique may be less concerning in practice. The simplicity of equal weights can also make the index more accessible and easier to understand for policymakers and the public
Terminology used in this notebook follows the EFW hierarchy: Area, Component, and Subcomponent. When we refer to variables used in EFA/CFA, we report names and codes (for example, X1A, X1D_i).
1 Variable Labels (Paper Outputs)
2 Setup
2.1 Libraries
Minimum set of libraries
library(readxl)
library(writexl)
library(tidyverse)
library(labelled)
library(ggcorrplot)
library(gridExtra)
library(GGally)
library(knitr)
library(kableExtra)
require(psych)
library(GPArotation)
library(Hmisc)
require(lavaan)
require(lavaanPlot)
require(ResourceSelection)
require(semPlot)
require(tidySEM)
require(performance)
require(car)
library(semPlot)2.2 Functions
Correlation, printing, and citing
corplot <- function(d, m = "pearson", sz = 2.5, title = NULL, hc_order = FALSE) {
d_num <- d %>% dplyr::select(where(is.numeric))
if (ncol(d_num) < 2) return(NULL)
mycorr <- cor(d_num, use = "pairwise.complete.obs", method = m)
p.mat <- ggcorrplot::cor_pmat(d_num)
ggcorrplot(mycorr,
hc.order = hc_order,
type = "lower",
colors = c("red","white","green"),
tl.cex = 8,
tl.col = "black",
lab = TRUE,
lab_size = sz,
p.mat = p.mat,
sig.level = 0.01,
insig = "pch",
pch = 4) +
ggplot2::ggtitle(title %||% "")
}
myprint=function(x){x%>%kbl()%>%kable_classic(html_font = "Cambria")}3 Data
# Import Data
raw_data <- read_excel("../data/raw/economic-freedom-of-the-world-2024-master-index-data-for-researchers-iso.xlsx",
sheet = "EFW Data 2024 Report",
range = "B5:CH4790"
)New names:
• `data` -> `data...9`
• `data` -> `data...11`
• `data` -> `data...13`
• `data` -> `data...15`
• `data` -> `data...17`
• `data` -> `data...35`
• `data` -> `data...37`
• `data` -> `data...39`
• `data` -> `data...44`
• `data` -> `data...46`
• `data` -> `data...48`
df <- raw_data
#names(df)
#str(df)4 Data Cleaning: Fix imported var names and add Labels
4.1 Area 1: Size of Government
1A: Government consumption
1B: Transfers and subsidies
1C: Government investment
1D: Top marginal tax rate
i. 1Di: Top marginal income tax rate
ii. 1Dii: Top marginal income and payroll tax rates
1E: State ownership of assets
# --- Rename only Area 1 columns ---
df <- df %>%
rename(
`1A` = `Government consumption`, # score
`1A_data` = `data...9`, # underlying data
`1B` = `Transfers and subsidies`, # score
`1B_data` = `data...11`, # underlying data
`1C` = `Government investment`, # score
`1C_data` = `data...13`, # underlying data
`1D_i` = `Top marginal income tax rate`, # score
`1D_i_data` = `data...15`, # underlying data
`1D_ii` = `Top marginal income and payroll tax rate`, # score
`1D_ii_data` = `data...17`, # underlying data
`1D` = `Top marginal tax rate`, # combined top rate
`1E` = `State ownership of Assets`, # score
`Area1` = `Size of Government`, # Area 1 summary score
`Area1_rank` = `Area 1 Rank` # Area 1 rank
)
# --- Attach labels so you keep the readable names ---
var_label(df$`1A`) <- "Government consumption"
var_label(df$`1A_data`) <- "Government consumption – data"
var_label(df$`1B`) <- "Transfers and subsidies"
var_label(df$`1B_data`) <- "Transfers and subsidies – data"
var_label(df$`1C`) <- "Government investment"
var_label(df$`1C_data`) <- "Government investment – data"
var_label(df$`1D_i`) <- "Top marginal income tax rate"
var_label(df$`1D_i_data`) <- "Top marginal income tax rate – data"
var_label(df$`1D_ii`) <- "Top marginal income and payroll tax rate"
var_label(df$`1D_ii_data`) <- "Top marginal income and payroll tax rate – data"
var_label(df$`1D`) <- "Top marginal tax rate (aggregate score)"
var_label(df$`1E`) <- "State ownership of Assets"
var_label(df$Area1) <- "Area 1: Size of Government (summary score)"
var_label(df$Area1_rank) <- "Area 1 Rank"
#dplyr::glimpse(df) # see new names
#sapply(df[c("1A","1B","1C","1D","1E","Area1")], var_label) # see labels4.2 Area 2: Legal System and Property Rights
2A–2H cover the eight Subcomponents.
2_adj, 2_wo_gen, 2_with_gen capture the gender adjustment and the two alternate summary indices.
Area2 = the official “with gender adjustment” score.
df <- df %>%
rename(
`2A` = `Judicial independence`,
`2B` = `Impartial courts`,
`2C` = `Protection of property rights`,
`2D` = `Military interference in rule of law and politics`,
`2E` = `Integrity of the legal system`,
`2F` = `Legal enforcement of contracts`,
`2G` = `Regulatory restrictions on the sale of real property`,
`2H` = `Reliability of police`,
`2_adj` = `Gender Legal Rights Adjustment`,
`2_wo_gen` = `Legal System & Property Rights -- Without Gender Adjustment`,
`2_with_gen` = `Legal System & Property Rights -- With Gender Adjustment`,
Area2_rank = `Area 2 Rank`
) %>%
mutate(
Area2 = `2_with_gen`
) %>%
relocate(Area2, .after = `2_with_gen`)
# Labels
var_label(df$`2A`) <- "Judicial independence"
var_label(df$`2B`) <- "Impartial courts"
var_label(df$`2C`) <- "Protection of property rights"
var_label(df$`2D`) <- "Military interference in rule of law and politics"
var_label(df$`2E`) <- "Integrity of the legal system"
var_label(df$`2F`) <- "Legal enforcement of contracts"
var_label(df$`2G`) <- "Regulatory restrictions on the sale of real property"
var_label(df$`2H`) <- "Reliability of police"
var_label(df$`2_adj`) <- "Gender Legal Rights Adjustment"
var_label(df$`2_wo_gen`) <- "Legal System & Property Rights (without Gender Adjustment)"
var_label(df$`2_with_gen`) <- "Legal System & Property Rights (with Gender Adjustment)"
var_label(df$Area2) <- "Area 2: Legal System & Property Rights (summary score)"
var_label(df$Area2_rank) <- "Area 2: Rank"4.3 Create the Scaled Variables for Area 2
#str(df)
# Compute scaling factor
scaling_factor <- (1 + df$`2_adj`) / 2
# Create new scaled variables and assign descriptive names
df$`2A_scaled` <- df$`2A` * scaling_factor
label(df$`2A_scaled`) <- "Judicial independence*"
df$`2B_scaled` <- df$`2B` * scaling_factor
label(df$`2B_scaled`) <- "Impartial courts*"
df$`2C_scaled` <- df$`2C` * scaling_factor
label(df$`2C_scaled`) <- "Protection of property rights*"
df$`2D_scaled` <- df$`2D` * scaling_factor
label(df$`2D_scaled`) <- "Military interference in rule of law and politics*"
df$`2E_scaled` <- df$`2E` * scaling_factor
label(df$`2E_scaled`) <- "Integrity of the legal system *"
df$`2F_scaled` <- df$`2F` * scaling_factor
label(df$`2F_scaled`) <- "Legal enforcement of contracts*"
df$`2G_scaled` <- df$`2G` * scaling_factor
label(df$`2G_scaled`) <- "Regulatory restrictions on property*"
df$`2H_scaled` <- df$`2H` * scaling_factor
label(df$`2H_scaled`) <- "Reliability of police*"4.4 Area 3: Sound Money
df <- df %>%
rename(
`3A` = `Money growth`,
`3A_data` = `data...35`,
`3B` = `Standard deviation of inflation`,
`3B_data` = `data...37`,
`3C` = `Inflation: Most recent year`,
`3C_data` = `data...39`,
`3D` = `Freedom to own foreign currency bank accounts`,
Area3 = `Sound Money`,
Area3_rank = `Area 3 Rank`
)
var_label(df$`3A`) <- "Money growth"
var_label(df$`3A_data`) <- "Money growth – data"
var_label(df$`3B`) <- "Standard deviation of inflation"
var_label(df$`3B_data`) <- "Standard deviation of inflation – data"
var_label(df$`3C`) <- "Inflation: Most recent year"
var_label(df$`3C_data`) <- "Inflation: Most recent year – data"
var_label(df$`3D`) <- "Freedom to own foreign currency bank accounts"
var_label(df$Area3) <- "Area 3: Sound Money (summary score)"
var_label(df$Area3_rank) <- "Area 3: Rank"4.5 Area 4: Freedom to Trade Internationally
df <- df %>%
rename(
`4A_i` = `Revenue from trade taxes (% of trade sector)`,
`4A_i_data` = `data...44`,
`4A_ii` = `Mean tariff rate`,
`4A_ii_data`= `data...46`,
`4A_iii` = `Standard deviation of tariff rates`,
`4A_iii_data`= `data...48`,
`4A` = `Tariffs`,
`4B_i` = `Non-tariff trade barriers`,
`4B_ii` = `Compliance costs of importing and exporting`,
`4B` = `Regulatory trade barriers`,
`4C` = `Black market exchange rates`,
`4D_i` = `Financial openness`,
`4D_ii` = `Capital controls`,
`4D_iii` = `Freedom of foreigners to visit`,
`4D_iv` = `Protection of foreign assets`,
`4D` = `Controls of the movement of capital and people`,
Area4 = `Freedom to trade internationally`,
Area4_rank = `Area 4 Rank`
)
var_label(df$`4A_i`) <- "Revenue from trade taxes (% of trade sector)"
var_label(df$`4A_i_data`) <- "Revenue from trade taxes – data"
var_label(df$`4A_ii`) <- "Mean tariff rate"
var_label(df$`4A_ii_data`)<- "Mean tariff rate – data"
var_label(df$`4A_iii`) <- "Standard deviation of tariff rates"
var_label(df$`4A_iii_data`)<- "Standard deviation of tariff rates – data"
var_label(df$`4A`) <- "Tariffs (aggregate score)"
var_label(df$`4B_i`) <- "Non-tariff trade barriers"
var_label(df$`4B_ii`) <- "Compliance costs of importing and exporting"
var_label(df$`4B`) <- "Regulatory trade barriers (aggregate score)"
var_label(df$`4C`) <- "Black market exchange rates"
var_label(df$`4D_i`) <- "Financial openness"
var_label(df$`4D_ii`) <- "Capital controls"
var_label(df$`4D_iii`) <- "Freedom of foreigners to visit"
var_label(df$`4D_iv`) <- "Protection of foreign assets"
var_label(df$`4D`) <- "Controls of the movement of capital and people (aggregate score)"
var_label(df$Area4) <- "Area 4: Freedom to trade internationally (summary score)"
var_label(df$Area4_rank) <- "Area 4: Rank"4.6 Area 5: Regulation
df <- df %>%
rename(
`5A_i` = `Ownership of banks`,
`5A_ii` = `Private sector credit`,
`5A_iii` = `Interest rate controls/negative real interest rates)`,
`5A` = `Credit market regulations`,
`5B_i` = `Hiring regulations and minimum wage`,
`5B_ii` = `Hiring and firing regulations`,
`5B_iii` = `Centralized collective bargaining`,
`5B_iv` = `Hours Regulations`,
`5B_v` = `Mandated cost of worker dismissal`,
`5B_vi` = `Conscription`,
`5B_vii` = `Foreign Labor`,
`5B` = `Labor market regulations`,
`5C_i` = `Regulatory Burden`,
`5C_ii` = `Bureacracy costs`,
`5C_iii` = `Impartial Public Administration`,
`5C_iv` = `Tax compliance`,
`5C` = `Business regulations`,
`5D_i` = `Market openness`,
`5D_ii` = `Business Permits`,
`5D_iii` = `Distorton of the business environment`,
`5D` = `Freedom to enter markets and compete`,
Area5 = `Regulation`,
Area5_rank = `Area 5 Rank`
)
var_label(df$`5A_i`) <- "Ownership of banks"
var_label(df$`5A_ii`) <- "Private sector credit"
var_label(df$`5A_iii`) <- "Interest rate controls / negative real interest rates"
var_label(df$`5A`) <- "Credit market regulations (aggregate score)"
var_label(df$`5B_i`) <- "Hiring regulations and minimum wage"
var_label(df$`5B_ii`) <- "Hiring and firing regulations"
var_label(df$`5B_iii`) <- "Centralized collective bargaining"
var_label(df$`5B_iv`) <- "Hours Regulations"
var_label(df$`5B_v`) <- "Mandated cost of worker dismissal"
var_label(df$`5B_vi`) <- "Conscription"
var_label(df$`5B_vii`) <- "Foreign Labor"
var_label(df$`5B`) <- "Labor market regulations (aggregate score)"
var_label(df$`5C_i`) <- "Regulatory Burden"
var_label(df$`5C_ii`) <- "Bureaucracy costs"
var_label(df$`5C_iii`) <- "Impartial Public Administration"
var_label(df$`5C_iv`) <- "Tax compliance"
var_label(df$`5C`) <- "Business regulations (aggregate score)"
var_label(df$`5D_i`) <- "Market openness"
var_label(df$`5D_ii`) <- "Business Permits"
var_label(df$`5D_iii`) <- "Distortion of the business environment"
var_label(df$`5D`) <- "Freedom to enter markets and compete (aggregate score)"
var_label(df$Area5) <- "Area 5: Regulation (summary score)"
var_label(df$Area5_rank) <- "Area 5: Rank"5 Export 2022 Clean Data
df_2022 <- df %>%
filter(Year == 2022) %>%
select(
Year, Countries, `Economic Freedom Summary Index`, Rank,
# Area 1
`1A`, `1B`, `1C`, `1D_i`, `1D_ii`, `1D`, `1E`, Area1, Area1_rank,
# Area 2
`2A`, `2B`, `2C`, `2D`, `2E`, `2F`, `2G`, `2H`, `2_adj`, `2_wo_gen`, `2_with_gen`, Area2, Area2_rank,
# Scaled variables from Area 2
`2A_scaled`, `2B_scaled`, `2C_scaled`, `2D_scaled`, `2E_scaled`, `2F_scaled`, `2G_scaled`, `2H_scaled`,
# Area 3
`3A`, `3B`, `3C`, `3D`, Area3, Area3_rank,
# Area 4
`4A_i`, `4A_ii`, `4A_iii`, `4A`, `4B_i`, `4B_ii`, `4B`, `4C`,
`4D_i`, `4D_ii`, `4D_iii`, `4D_iv`, `4D`, Area4, Area4_rank,
# Area 5
`5A_i`, `5A_ii`, `5A_iii`, `5A`,
`5B_i`, `5B_ii`, `5B_iii`, `5B_iv`, `5B_v`, `5B_vi`, `5B_vii`, `5B`,
`5C_i`, `5C_ii`, `5C_iii`, `5C_iv`, `5C`,
`5D_i`, `5D_ii`, `5D_iii`, `5D`,
Area5, Area5_rank
)
# myprint(describe(mydata))# export
write_xlsx(x = df_2022,
path = "../data/clean/economic_freedom_cleaned_2022.xlsx"
)6 Standardize Subcomponents
Z-scores to adjust for magnitudes
# Step 1: Create a copy of the original dataset
df_2022_raw <- df_2022
# Step 2: Identify numeric columns to standardize (exclude Year, Areas, and Ranks)
num_cols <- df_2022_raw %>%
select(where(is.numeric)) %>%
names() %>%
setdiff(c("Year",
"Rank",
paste0("Area", 1:5),
paste0("Area", 1:5, "_rank"))) # Exclude Area scores & ranks
# Step 3: Create standardized versions with "_z" suffix
df_2022_std <- df_2022_raw %>%
mutate(across(all_of(num_cols), ~ as.numeric(scale(.)), .names = "{.col}_z"))df_2022_std has Year, Countries, all other numeric columns, z-scored cols and labels preserved.
7 Lavaan Safe
# 1. Rename variables to make them lavaan-safe (prepend "X")
df_2022_std <- df_2022_std %>%
dplyr::rename_with(.fn = ~ paste0("X", .x), .cols = matches("^[0-9]"))8 Summary Stats Table (2022) with standardised vars
# 1. Split into standardized and non-standardized numeric variables
df_std_vars <- df_2022_std %>% select(ends_with("_z"))
df_non_std_vars <- df_2022_std %>%
select(where(is.numeric)) %>%
select(-ends_with("_z"))
# Use the shared label map defined in the setup functions section.
paper_var_labels_summary <- var_label_map
# 2. Descriptive stats for standardized variables
desc_std <- psych::describe(df_std_vars, fast = FALSE)
desc_std_df <- data.frame(desc_std) %>%
rownames_to_column("Variable") %>%
select(Variable, n, mean, sd, min, max) %>%
rename(N = n, Mean = mean, SD = sd, Min = min, Max = max)
# 3. Descriptive stats for non-standardized variables
desc_non_std <- psych::describe(df_non_std_vars, fast = FALSE)
desc_non_std_df <- data.frame(desc_non_std) %>%
rownames_to_column("Variable") %>%
select(Variable, n, mean, sd, min, max) %>%
rename(N = n, Mean = mean, SD = sd, Min = min, Max = max) %>%
mutate(
Variable_Label = dplyr::if_else(
Variable %in% names(paper_var_labels_summary),
unname(paper_var_labels_summary[Variable]),
Variable
),
`Code: Variable` = dplyr::if_else(
Variable %in% names(paper_var_labels_summary),
paste0(Variable, ": ", Variable_Label),
Variable
)
)
# 4. Display: Non-standardized variables
kable(
desc_non_std_df %>% select(`Code: Variable`, N, Mean, SD, Min, Max),
digits = 2,
col.names = c("Code: Variable", "N", "Mean", "SD", "Min", "Max"),
align = "lccccc",
caption = "Summary Statistics – Non-Standardized Variables"
) %>%
kable_styling(full_width = FALSE, position = "center", font_size = 12)| Code: Variable | N | Mean | SD | Min | Max |
|---|---|---|---|---|---|
| Year | 165 | 2022.00 | 0.00 | 2022.00 | 2022.00 |
| Economic Freedom Summary Index | 165 | 6.53 | 1.05 | 3.02 | 8.58 |
| Rank | 165 | 82.78 | 47.80 | 1.00 | 165.00 |
| X1A: Government consumption | 165 | 5.47 | 2.46 | 0.00 | 10.00 |
| X1B: Transfers and subsidies | 156 | 7.53 | 2.05 | 1.82 | 10.00 |
| X1C: Government investment | 161 | 7.12 | 3.28 | 0.00 | 10.00 |
| X1D_i: Top marginal income tax rate | 165 | 7.47 | 2.19 | 2.00 | 10.00 |
| X1D_ii: Top marginal income and payroll tax rate | 162 | 5.48 | 2.63 | 0.00 | 10.00 |
| X1D | 165 | 6.50 | 2.23 | 1.00 | 10.00 |
| X1E: State ownership of assets | 162 | 6.66 | 1.58 | 2.44 | 9.52 |
| Area1 | 165 | 6.64 | 1.12 | 3.62 | 9.06 |
| Area1_rank | 165 | 83.00 | 47.78 | 1.00 | 165.00 |
| X2A | 165 | 5.51 | 1.45 | 2.45 | 8.64 |
| X2B | 165 | 4.66 | 1.89 | 0.83 | 8.84 |
| X2C | 165 | 5.44 | 2.16 | 0.00 | 9.67 |
| X2D | 138 | 6.29 | 2.66 | 0.00 | 10.00 |
| X2E | 164 | 5.83 | 1.85 | 1.70 | 9.80 |
| X2F | 165 | 3.97 | 1.98 | 0.00 | 8.73 |
| X2G | 163 | 7.59 | 1.52 | 2.67 | 9.98 |
| X2H | 165 | 5.21 | 2.42 | 0.00 | 9.77 |
| X2_adj | 165 | 0.87 | 0.17 | 0.29 | 1.00 |
| X2_wo_gen | 165 | 5.53 | 1.66 | 1.98 | 9.10 |
| X2_with_gen | 165 | 5.23 | 1.79 | 1.63 | 9.10 |
| Area2 | 165 | 5.23 | 1.79 | 1.63 | 9.10 |
| Area2_rank | 165 | 83.00 | 47.78 | 1.00 | 165.00 |
| X2A_scaled: Judicial independence* | 165 | 5.20 | 1.59 | 1.97 | 8.64 |
| X2B_scaled: Impartial courts* | 165 | 4.42 | 1.96 | 0.68 | 8.84 |
| X2C_scaled: Property rights* | 165 | 5.16 | 2.23 | 0.00 | 9.67 |
| X2D_scaled: Military interference* | 138 | 5.99 | 2.75 | 0.00 | 10.00 |
| X2E_scaled: Legal integrity* | 164 | 5.52 | 1.97 | 1.55 | 9.80 |
| X2F_scaled: Contract enforcement* | 165 | 3.78 | 2.02 | 0.00 | 8.73 |
| X2G_scaled: Real property restrictions* | 163 | 7.13 | 1.66 | 2.35 | 9.98 |
| X2H_scaled: Police reliability* | 165 | 4.92 | 2.38 | 0.00 | 9.77 |
| X3A: Money growth | 163 | 8.24 | 1.61 | 0.00 | 9.98 |
| X3B: Inflation volatility | 165 | 7.65 | 2.52 | 0.00 | 9.74 |
| X3C: Recent inflation | 165 | 5.88 | 2.61 | 0.00 | 9.46 |
| X3D: Foreign currency accounts | 165 | 7.15 | 3.95 | 0.00 | 10.00 |
| Area3 | 165 | 7.23 | 1.71 | 0.74 | 9.55 |
| Area3_rank | 165 | 82.99 | 47.77 | 1.00 | 165.00 |
| X4A_i: Trade tax revenue | 156 | 8.63 | 1.47 | 3.33 | 10.00 |
| X4A_ii: Mean tariff rate | 164 | 8.26 | 1.02 | 3.50 | 10.00 |
| X4A_iii: Tariff dispersion | 165 | 6.06 | 2.13 | 0.00 | 10.00 |
| X4A | 165 | 7.62 | 1.18 | 3.92 | 10.00 |
| X4B_i: Non-tariff barriers | 165 | 5.75 | 1.80 | 0.00 | 9.18 |
| X4B_ii: Trade compliance costs | 165 | 6.46 | 3.01 | 0.00 | 9.98 |
| X4B | 165 | 6.11 | 2.11 | 1.69 | 9.41 |
| X4C: Black market exchange rates | 165 | 9.22 | 2.47 | 0.00 | 10.00 |
| X4D_i: Financial openness | 165 | 5.84 | 3.02 | 0.00 | 10.00 |
| X4D_ii: Capital controls | 164 | 3.52 | 2.76 | 0.00 | 10.00 |
| X4D_iii: Freedom of foreigners to visit | 165 | 7.10 | 3.38 | 0.00 | 10.00 |
| X4D_iv: Protection of foreign assets | 165 | 5.73 | 2.10 | 0.00 | 9.27 |
| X4D | 165 | 5.55 | 2.15 | 0.69 | 9.40 |
| Area4 | 165 | 7.12 | 1.49 | 2.48 | 9.66 |
| Area4_rank | 165 | 83.00 | 47.78 | 1.00 | 165.00 |
| X5A_i: Ownership of banks | 151 | 7.59 | 2.78 | 0.00 | 10.00 |
| X5A_ii: Private sector credit | 162 | 7.39 | 2.53 | 0.00 | 10.00 |
| X5A_iii: Interest rate controls | 163 | 8.35 | 2.27 | 0.00 | 10.00 |
| X5A | 164 | 7.74 | 1.63 | 0.00 | 10.00 |
| X5B_i: Hiring regulations and minimum wage | 165 | 5.81 | 1.80 | 1.25 | 9.45 |
| X5B_ii: Hiring and firing regulations | 165 | 5.64 | 1.97 | 0.00 | 10.00 |
| X5B_iii: Centralized collective bargaining | 141 | 6.48 | 1.22 | 1.61 | 8.67 |
| X5B_iv: Hours regulations | 165 | 7.84 | 1.94 | 2.00 | 10.00 |
| X5B_v: Mandated cost of worker dismissal | 162 | 6.49 | 2.83 | 0.00 | 10.00 |
| X5B_vi: Conscription | 165 | 6.61 | 4.20 | 0.00 | 10.00 |
| X5B_vii: Foreign labor | 165 | 5.02 | 1.49 | 0.00 | 8.14 |
| X5B | 165 | 6.25 | 1.22 | 2.67 | 9.14 |
| X5C_i: Regulatory burden | 158 | 4.15 | 1.22 | 1.14 | 7.44 |
| X5C_ii: Bureaucracy costs | 165 | 5.04 | 2.22 | 0.00 | 9.56 |
| X5C_iii: Impartial public administration | 162 | 5.62 | 2.48 | 0.34 | 9.91 |
| X5C_iv: Tax compliance | 165 | 5.90 | 1.92 | 0.00 | 9.87 |
| X5C | 165 | 5.17 | 1.55 | 0.60 | 9.05 |
| X5D_i: Market openness | 165 | 6.07 | 1.80 | 1.24 | 10.00 |
| X5D_ii: Business permits | 161 | 8.39 | 1.14 | 4.98 | 10.00 |
| X5D_iii: Distortion of business environment | 165 | 5.20 | 2.29 | 0.00 | 10.00 |
| X5D | 165 | 6.52 | 1.44 | 2.59 | 9.86 |
| Area5 | 165 | 6.41 | 1.13 | 2.54 | 8.86 |
| Area5_rank | 165 | 83.00 | 47.78 | 1.00 | 165.00 |
# 5. Display: Standardized variables
kable(
desc_std_df,
digits = 2,
col.names = c("Variable", "N", "Mean", "SD", "Min", "Max"),
align = "lccccc",
caption = "Summary Statistics – Standardized Variables (_z)"
) %>%
kable_styling(full_width = FALSE, position = "center", font_size = 12)| Variable | N | Mean | SD | Min | Max |
|---|---|---|---|---|---|
| Economic Freedom Summary Index_z | 165 | 0 | 1 | -3.35 | 1.96 |
| X1A_z | 165 | 0 | 1 | -2.23 | 1.84 |
| X1B_z | 156 | 0 | 1 | -2.78 | 1.20 |
| X1C_z | 161 | 0 | 1 | -2.17 | 0.88 |
| X1D_i_z | 165 | 0 | 1 | -2.49 | 1.15 |
| X1D_ii_z | 162 | 0 | 1 | -2.08 | 1.72 |
| X1D_z | 165 | 0 | 1 | -2.47 | 1.57 |
| X1E_z | 162 | 0 | 1 | -2.68 | 1.81 |
| X2A_z | 165 | 0 | 1 | -2.12 | 2.16 |
| X2B_z | 165 | 0 | 1 | -2.03 | 2.22 |
| X2C_z | 165 | 0 | 1 | -2.52 | 1.96 |
| X2D_z | 138 | 0 | 1 | -2.37 | 1.39 |
| X2E_z | 164 | 0 | 1 | -2.23 | 2.15 |
| X2F_z | 165 | 0 | 1 | -2.00 | 2.40 |
| X2G_z | 163 | 0 | 1 | -3.24 | 1.57 |
| X2H_z | 165 | 0 | 1 | -2.15 | 1.89 |
| X2_adj_z | 165 | 0 | 1 | -3.45 | 0.77 |
| X2_wo_gen_z | 165 | 0 | 1 | -2.13 | 2.15 |
| X2_with_gen_z | 165 | 0 | 1 | -2.01 | 2.16 |
| X2A_scaled_z | 165 | 0 | 1 | -2.04 | 2.16 |
| X2B_scaled_z | 165 | 0 | 1 | -1.91 | 2.26 |
| X2C_scaled_z | 165 | 0 | 1 | -2.31 | 2.02 |
| X2D_scaled_z | 138 | 0 | 1 | -2.18 | 1.46 |
| X2E_scaled_z | 164 | 0 | 1 | -2.01 | 2.17 |
| X2F_scaled_z | 165 | 0 | 1 | -1.87 | 2.46 |
| X2G_scaled_z | 163 | 0 | 1 | -2.88 | 1.72 |
| X2H_scaled_z | 165 | 0 | 1 | -2.07 | 2.04 |
| X3A_z | 163 | 0 | 1 | -5.11 | 1.08 |
| X3B_z | 165 | 0 | 1 | -3.04 | 0.83 |
| X3C_z | 165 | 0 | 1 | -2.25 | 1.37 |
| X3D_z | 165 | 0 | 1 | -1.81 | 0.72 |
| X4A_i_z | 156 | 0 | 1 | -3.61 | 0.93 |
| X4A_ii_z | 164 | 0 | 1 | -4.68 | 1.71 |
| X4A_iii_z | 165 | 0 | 1 | -2.85 | 1.85 |
| X4A_z | 165 | 0 | 1 | -3.15 | 2.02 |
| X4B_i_z | 165 | 0 | 1 | -3.19 | 1.90 |
| X4B_ii_z | 165 | 0 | 1 | -2.14 | 1.17 |
| X4B_z | 165 | 0 | 1 | -2.09 | 1.57 |
| X4C_z | 165 | 0 | 1 | -3.73 | 0.32 |
| X4D_i_z | 165 | 0 | 1 | -1.94 | 1.38 |
| X4D_ii_z | 164 | 0 | 1 | -1.28 | 2.35 |
| X4D_iii_z | 165 | 0 | 1 | -2.10 | 0.86 |
| X4D_iv_z | 165 | 0 | 1 | -2.73 | 1.69 |
| X4D_z | 165 | 0 | 1 | -2.27 | 1.79 |
| X5A_i_z | 151 | 0 | 1 | -2.73 | 0.87 |
| X5A_ii_z | 162 | 0 | 1 | -2.92 | 1.03 |
| X5A_iii_z | 163 | 0 | 1 | -3.68 | 0.73 |
| X5A_z | 164 | 0 | 1 | -4.75 | 1.38 |
| X5B_i_z | 165 | 0 | 1 | -2.54 | 2.02 |
| X5B_ii_z | 165 | 0 | 1 | -2.87 | 2.21 |
| X5B_iii_z | 141 | 0 | 1 | -3.99 | 1.80 |
| X5B_iv_z | 165 | 0 | 1 | -3.01 | 1.11 |
| X5B_v_z | 162 | 0 | 1 | -2.29 | 1.24 |
| X5B_vi_z | 165 | 0 | 1 | -1.57 | 0.81 |
| X5B_vii_z | 165 | 0 | 1 | -3.36 | 2.09 |
| X5B_z | 165 | 0 | 1 | -2.93 | 2.36 |
| X5C_i_z | 158 | 0 | 1 | -2.46 | 2.69 |
| X5C_ii_z | 165 | 0 | 1 | -2.27 | 2.04 |
| X5C_iii_z | 162 | 0 | 1 | -2.13 | 1.73 |
| X5C_iv_z | 165 | 0 | 1 | -3.07 | 2.07 |
| X5C_z | 165 | 0 | 1 | -2.95 | 2.51 |
| X5D_i_z | 165 | 0 | 1 | -2.69 | 2.19 |
| X5D_ii_z | 161 | 0 | 1 | -2.99 | 1.41 |
| X5D_iii_z | 165 | 0 | 1 | -2.27 | 2.10 |
| X5D_z | 165 | 0 | 1 | -2.73 | 2.33 |
# export
write_xlsx(x = df_2022_std,
path = "../data/clean/economic_freedom_cleaned_2022_standardised.xlsx")
# Paper export: use the non-standardized summary stats (not desc_std_df)
dir.create("../paper_draft/tables", recursive = TRUE, showWarnings = FALSE)
write.csv(
desc_non_std_df %>% select(`Code: Variable`, N, Mean, SD, Min, Max),
"../paper_draft/tables/paper_summary_stats_nonstandardized.csv",
row.names = FALSE
)
# Reader-friendly appendix table: keep only Component/Subcomponent variables that have explicit labels in `var_label_map`
write.csv(
desc_non_std_df %>%
filter(Variable %in% names(var_label_map)) %>%
select(`Code: Variable`, N, Mean, SD, Min, Max),
"../paper_draft/tables/paper_summary_stats_nonstandardized_labeled_v7.csv",
row.names = FALSE
)
#str(df_2022_std)9 EXCEL REPLICATION
- Write
rmeanfunction to compute row means across multiple columns
rmean <- function(...){
rowMeans(cbind(...), na.rm = TRUE)
}
#rmean(df_2022_std$X1A, df_2022_std$X1B)10 METHOD 3: Direct Weighted Average on Sub-Components
# Create manual EFI and rank
df_2022_manual_subcomponent <- df_2022_std %>%
mutate(
EFI_Manual_subcomponent =
0.20 * rmean(x = X1A,
X1B,
X1C,
rmean(X1D_i,X1D_ii),
X1E
) +
0.20 * rmean(x = X2A_scaled,
X2B_scaled,
X2C_scaled,
X2D_scaled,
X2E_scaled,
X2F_scaled,
X2G_scaled,
X2H_scaled
) +
0.20 * rmean(x = X3A,
X3B,
X3C,
X3D
) +
0.20 * rmean(x = rmean(X4A_i,X4A_ii,X4A_iii),
rmean(X4B_i,X4B_ii), X4C,
rmean(X4D_i,X4D_ii,X4D_iii,X4D_iv)
) +
0.20 * rmean(x = rmean(X5A_i,X5A_ii,X5A_iii),
rmean(X5B_i,X5B_ii,X5B_iii,X5B_iv, X5B_v, X5B_vi, X5B_vii),
rmean(X5C_i,X5C_ii, X5C_iii, X5C_iv),
rmean(X5D_i,X5D_ii,X5D_iii)
)
) %>%
arrange(desc(EFI_Manual_subcomponent)) %>%
mutate(Rank_Manual_subcomponent = row_number())
# Select relevant columns for comparison
rank_comparison_subcomponent <- df_2022_manual_subcomponent %>%
select(Countries, Rank, Rank_Manual_subcomponent, `Economic Freedom Summary Index`, EFI_Manual_subcomponent)
# Display nicely
kable(
rank_comparison_subcomponent,
digits = 2,
col.names = c("Country", "Original Rank", "Manual Rank", "Economic Freedom Summary Index","Manual EFI Score"),
caption = "Comparison of Original vs. Manual EFI Rankings"
) %>%
kable_styling(full_width = FALSE, position = "center", font_size = 12)| Country | Original Rank | Manual Rank | Economic Freedom Summary Index | Manual EFI Score |
|---|---|---|---|---|
| Hong Kong SAR, China | 1 | 1 | 8.58 | 8.58 |
| Singapore | 2 | 2 | 8.55 | 8.55 |
| Switzerland | 3 | 3 | 8.43 | 8.43 |
| New Zealand | 4 | 4 | 8.39 | 8.39 |
| United States | 5 | 5 | 8.09 | 8.09 |
| Ireland | 6 | 6 | 8.02 | 8.02 |
| Denmark | 6 | 7 | 8.02 | 8.02 |
| Canada | 8 | 8 | 7.99 | 7.99 |
| Australia | 9 | 9 | 7.98 | 7.98 |
| Luxembourg | 9 | 10 | 7.98 | 7.98 |
| Japan | 11 | 11 | 7.90 | 7.90 |
| United Kingdom | 12 | 12 | 7.88 | 7.88 |
| Finland | 13 | 13 | 7.87 | 7.87 |
| Iceland | 14 | 14 | 7.84 | 7.84 |
| Malta | 15 | 15 | 7.82 | 7.82 |
| Germany | 16 | 16 | 7.80 | 7.80 |
| Mauritius | 17 | 17 | 7.79 | 7.79 |
| Netherlands | 18 | 18 | 7.74 | 7.74 |
| Taiwan | 19 | 19 | 7.71 | 7.71 |
| Estonia | 20 | 20 | 7.67 | 7.67 |
| Georgia | 21 | 21 | 7.66 | 7.66 |
| Czechia | 22 | 22 | 7.65 | 7.65 |
| Panama | 23 | 23 | 7.62 | 7.62 |
| Austria | 23 | 24 | 7.62 | 7.62 |
| Sweden | 25 | 25 | 7.61 | 7.61 |
| Costa Rica | 26 | 26 | 7.60 | 7.60 |
| Portugal | 27 | 27 | 7.59 | 7.59 |
| Norway | 28 | 28 | 7.58 | 7.58 |
| Malaysia | 29 | 29 | 7.56 | 7.56 |
| Spain | 30 | 30 | 7.54 | 7.54 |
| Cyprus | 31 | 31 | 7.53 | 7.53 |
| Latvia | 32 | 32 | 7.52 | 7.52 |
| Korea, Rep. | 32 | 33 | 7.52 | 7.52 |
| Bahrain | 34 | 34 | 7.51 | 7.51 |
| Lithuania | 34 | 35 | 7.51 | 7.51 |
| France | 36 | 36 | 7.49 | 7.49 |
| Armenia | 36 | 37 | 7.49 | 7.49 |
| Albania | 38 | 38 | 7.48 | 7.48 |
| Guatemala | 39 | 39 | 7.45 | 7.45 |
| Chile | 39 | 40 | 7.45 | 7.45 |
| Israel | 41 | 41 | 7.44 | 7.44 |
| Belgium | 42 | 42 | 7.42 | 7.42 |
| Romania | 43 | 43 | 7.41 | 7.41 |
| Peru | 43 | 44 | 7.41 | 7.41 |
| Slovak Republic | 45 | 45 | 7.39 | 7.39 |
| United Arab Emirates | 45 | 46 | 7.39 | 7.39 |
| Cabo Verde | 47 | 47 | 7.37 | 7.37 |
| Jordan | 48 | 48 | 7.33 | 7.33 |
| Dominican Republic | 49 | 49 | 7.32 | 7.32 |
| Jamaica | 49 | 50 | 7.32 | 7.32 |
| Italy | 51 | 51 | 7.22 | 7.22 |
| Bulgaria | 52 | 52 | 7.16 | 7.16 |
| Montenegro | 53 | 53 | 7.15 | 7.15 |
| Slovenia | 54 | 54 | 7.14 | 7.14 |
| Hungary | 55 | 55 | 7.12 | 7.12 |
| Croatia | 56 | 56 | 7.07 | 7.07 |
| Uruguay | 57 | 57 | 7.04 | 7.04 |
| Saudi Arabia | 58 | 58 | 7.03 | 7.03 |
| El Salvador | 59 | 59 | 7.01 | 7.01 |
| Philippines | 59 | 60 | 7.01 | 7.01 |
| Brunei Darussalam | 61 | 61 | 6.99 | 6.99 |
| Paraguay | 62 | 62 | 6.98 | 6.98 |
| Seychelles | 62 | 63 | 6.98 | 6.98 |
| Indonesia | 64 | 64 | 6.96 | 6.96 |
| Mexico | 65 | 65 | 6.94 | 6.94 |
| Thailand | 65 | 66 | 6.94 | 6.94 |
| Honduras | 67 | 67 | 6.92 | 6.92 |
| Kazakhstan | 68 | 68 | 6.89 | 6.89 |
| Mongolia | 69 | 69 | 6.86 | 6.86 |
| Poland | 70 | 70 | 6.85 | 6.85 |
| Gambia, The | 70 | 71 | 6.85 | 6.85 |
| Cambodia | 70 | 72 | 6.85 | 6.85 |
| Greece | 70 | 73 | 6.85 | 6.85 |
| Kenya | 74 | 74 | 6.82 | 6.82 |
| Barbados | 75 | 75 | 6.81 | 6.81 |
| Uganda | 76 | 76 | 6.75 | 6.75 |
| North Macedonia | 77 | 77 | 6.74 | 6.74 |
| Qatar | 78 | 78 | 6.71 | 6.71 |
| Serbia | 79 | 79 | 6.70 | 6.70 |
| Botswana | 80 | 80 | 6.67 | 6.67 |
| Bahamas, The | 81 | 81 | 6.65 | 6.65 |
| South Africa | 81 | 82 | 6.65 | 6.65 |
| Trinidad and Tobago | 83 | 83 | 6.64 | 6.64 |
| India | 84 | 84 | 6.58 | 6.58 |
| Brazil | 85 | 85 | 6.56 | 6.56 |
| Nepal | 86 | 86 | 6.54 | 6.54 |
| Colombia | 86 | 87 | 6.54 | 6.54 |
| Kyrgyz Republic | 88 | 88 | 6.53 | 6.53 |
| Moldova | 89 | 89 | 6.49 | 6.49 |
| Tajikistan | 90 | 90 | 6.46 | 6.46 |
| Morocco | 90 | 91 | 6.46 | 6.46 |
| Rwanda | 92 | 92 | 6.45 | 6.45 |
| Kuwait | 92 | 93 | 6.45 | 6.45 |
| Nicaragua | 94 | 94 | 6.42 | 6.42 |
| Tanzania | 95 | 95 | 6.40 | 6.40 |
| Oman | 95 | 96 | 6.40 | 6.40 |
| Benin | 97 | 97 | 6.26 | 6.26 |
| Bhutan | 98 | 98 | 6.24 | 6.24 |
| Vietnam | 99 | 99 | 6.23 | 6.23 |
| Namibia | 99 | 100 | 6.23 | 6.23 |
| Djibouti | 99 | 101 | 6.23 | 6.23 |
| Bosnia and Herzegovina | 102 | 102 | 6.22 | 6.22 |
| Zambia | 103 | 103 | 6.17 | 6.17 |
| Ecuador | 104 | 104 | 6.14 | 6.14 |
| China | 104 | 105 | 6.14 | 6.14 |
| Bolivia | 106 | 106 | 6.11 | 6.11 |
| Togo | 107 | 107 | 6.07 | 6.07 |
| Papua New Guinea | 108 | 108 | 6.02 | 6.02 |
| Belize | 109 | 109 | 6.00 | 6.00 |
| Senegal | 110 | 110 | 5.99 | 5.99 |
| Fiji | 110 | 111 | 5.99 | 5.99 |
| Belarus | 112 | 112 | 5.98 | 5.98 |
| Lesotho | 113 | 113 | 5.96 | 5.96 |
| Nigeria | 113 | 114 | 5.96 | 5.96 |
| Ghana | 113 | 115 | 5.96 | 5.96 |
| Somalia | 116 | 116 | 5.95 | 5.95 |
| Madagascar | 116 | 117 | 5.95 | 5.95 |
| Tunisia | 118 | 118 | 5.94 | 5.94 |
| Russian Federation | 119 | 119 | 5.93 | 5.93 |
| Mauritania | 120 | 120 | 5.91 | 5.91 |
| Burkina Faso | 121 | 121 | 5.90 | 5.90 |
| Côte d'Ivoire | 122 | 122 | 5.89 | 5.89 |
| Liberia | 123 | 123 | 5.87 | 5.87 |
| Sri Lanka | 123 | 124 | 5.87 | 5.87 |
| Lao PDR | 125 | 125 | 5.86 | 5.86 |
| Mozambique | 125 | 126 | 5.86 | 5.86 |
| Bangladesh | 127 | 127 | 5.85 | 5.85 |
| Azerbaijan | 128 | 128 | 5.80 | 5.80 |
| Haiti | 128 | 129 | 5.80 | 5.80 |
| Timor-Leste | 130 | 130 | 5.77 | 5.77 |
| Niger | 131 | 131 | 5.70 | 5.70 |
| Mali | 132 | 132 | 5.63 | 5.63 |
| Guinea | 132 | 133 | 5.63 | 5.63 |
| Pakistan | 134 | 134 | 5.62 | 5.62 |
| Cameroon | 135 | 135 | 5.61 | 5.61 |
| Guyana | 136 | 136 | 5.60 | 5.60 |
| Sierra Leone | 137 | 137 | 5.59 | 5.59 |
| Türkiye | 138 | 138 | 5.57 | 5.57 |
| Comoros | 139 | 139 | 5.56 | 5.56 |
| Malawi | 140 | 140 | 5.48 | 5.48 |
| Suriname | 141 | 141 | 5.46 | 5.46 |
| Eswatini | 142 | 142 | 5.42 | 5.42 |
| Egypt, Arab Rep. | 143 | 143 | 5.36 | 5.36 |
| Lebanon | 144 | 144 | 5.35 | 5.35 |
| Guinea-Bissau | 145 | 145 | 5.32 | 5.32 |
| Gabon | 146 | 146 | 5.28 | 5.28 |
| Iraq | 147 | 147 | 5.25 | 5.25 |
| Ethiopia | 148 | 148 | 5.24 | 5.24 |
| Burundi | 149 | 149 | 5.22 | 5.22 |
| Ukraine | 150 | 150 | 5.12 | 5.12 |
| Chad | 151 | 151 | 5.05 | 5.05 |
| Congo, Dem. Rep. | 152 | 152 | 5.02 | 5.02 |
| Central African Republic | 153 | 153 | 4.92 | 4.92 |
| Congo, Rep. | 154 | 154 | 4.89 | 4.89 |
| Angola | 155 | 155 | 4.79 | 4.79 |
| Yemen, Rep. | 156 | 156 | 4.68 | 4.68 |
| Libya | 157 | 157 | 4.65 | 4.65 |
| Iran, Islamic Rep. | 158 | 158 | 4.63 | 4.63 |
| Argentina | 159 | 159 | 4.55 | 4.55 |
| Myanmar | 160 | 160 | 4.54 | 4.54 |
| Algeria | 161 | 161 | 4.46 | 4.46 |
| Syrian Arab Republic | 162 | 162 | 4.28 | 4.28 |
| Sudan | 163 | 163 | 4.11 | 4.11 |
| Zimbabwe | 164 | 164 | 3.53 | 3.53 |
| Venezuela, RB | 165 | 165 | 3.02 | 3.02 |
The Economic Freedom Index claims to weight its five major Areas equally at 20% each, but this approach masks a more fundamental problem: the implicit weighting of individual Components and Subcomponents is highly unequal and seemingly arbitrary. Because each Area contains different numbers of Components and Subcomponents, individual measures end up with vastly different influence on the overall score. For instance, the government consumption Component (X1A) carries approximately 4% weight in the final index, while each labor-regulation Subcomponent (X5B_i through X5B_vii) carries only about 0.71% weight (a 5.6x difference). This means a single government-spending measure has much more influence than any one labor-market Subcomponent, despite no clear theoretical justification for why one should matter more than the other. Equal weighting at the Area level can obscure large disparities in the implied weights applied to Components and Subcomponents.
11 MODELS FOR WEIGHTS
11.1 EFA Theory
EFA is used when one don’t know the underlying structure of your data. It’s a data-driven approach where you:
Let the data reveal patterns and groupings
Discover which variables cluster together
Determine how many factors (latent variables) exist
See which items load onto which factors
11.2 CFA Theory
CFA is used when one already have a theory about the structure. It’s a theory-testing approach where you:
Specify in advance how many factors exist
Define which items should load on which factors
Test whether your proposed model fits the actual data
Evaluate if your theoretical structure is supporte
12 Implementation
Workflow used in this notebook:
- Run EFA to evaluate the latent structure in the Component/Subcomponent data.
- Derive empirical factor weights and compare country rankings.
- Run CFA to compare a data-driven 4-factor structure against the EFI-style 5-factor structure.
- Interpret results with emphasis on measurement fit and ranking sensitivity.
13 Local EFA/CFA Helpers
These small helper functions are defined inside this notebook so the post-Implementation analysis is self-contained and does not depend on external module files.
14 Area-Wise Correlations Before Factor Analysis
This section is a descriptive check before running EFA/CFA. It shows how indicators co-move within each EFI area using the cleaned 2022 dataset already created above.
Why this is useful:
- It gives an intuitive view of whether indicators inside each area appear to measure a common underlying dimension.
- It can reveal weakly related or unusually behaving indicators before factor analysis.
- It helps motivate why an empirical factor model may differ from the imposed five-area structure.
# Use the same Component/Subcomponent variables that feed the later factor analysis.
# For Area 2, use the gender-adjusted scaled indicators to match the EFI-adjusted measurement used elsewhere.
area1_corr <- df_2022_manual_subcomponent %>%
select(X1A, X1B, X1C, X1D_i, X1D_ii, X1E)
area2_corr <- df_2022_manual_subcomponent %>%
select(X2A_scaled, X2B_scaled, X2C_scaled, X2D_scaled, X2E_scaled, X2F_scaled, X2G_scaled, X2H_scaled)
area3_corr <- df_2022_manual_subcomponent %>%
select(X3A, X3B, X3C, X3D)
area4_corr <- df_2022_manual_subcomponent %>%
select(X4A_i, X4A_ii, X4A_iii, X4B_i, X4B_ii, X4C, X4D_i, X4D_ii, X4D_iii, X4D_iv)
area5_corr <- df_2022_manual_subcomponent %>%
select(
X5A_i, X5A_ii, X5A_iii,
X5B_i, X5B_ii, X5B_iii, X5B_iv, X5B_v, X5B_vi, X5B_vii,
X5C_i, X5C_ii, X5C_iii, X5C_iv,
X5D_i, X5D_ii, X5D_iii
)
# Single source of truth for Component/Subcomponent labels used in tables/figures/exports.
# Use `code: label` names in figures/tables so the paper is easier to read (analysis logic is unchanged).
efa_var_labels <- var_label_map
code_label_names <- function(df) {
current <- names(df)
mapped <- ifelse(current %in% names(efa_var_labels), paste0(current, ": ", efa_var_labels[current]), current)
names(df) <- mapped
df
}
area1_corr <- code_label_names(area1_corr)
area2_corr <- code_label_names(area2_corr)
area3_corr <- code_label_names(area3_corr)
area4_corr <- code_label_names(area4_corr)
area5_corr <- code_label_names(area5_corr)
pairwise_corr_range <- function(df, area_label) {
cm <- cor(df, use = "pairwise.complete.obs")
vals <- cm[lower.tri(cm, diag = FALSE)]
tibble::tibble(
Area = area_label,
MinCorr = min(vals, na.rm = TRUE),
MaxCorr = max(vals, na.rm = TRUE)
)
}
lapply(
list(Area1 = area1_corr, Area2 = area2_corr, Area3 = area3_corr, Area4 = area4_corr, Area5 = area5_corr),
dim
)$Area1
[1] 165 6
$Area2
[1] 165 8
$Area3
[1] 165 4
$Area4
[1] 165 10
$Area5
[1] 165 17
14.1 Area 1: Size of Government (Within-Area Correlations)
corplot(area1_corr, sz = 2.2, title = "Area 1: Size of Government")Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
ℹ The deprecated feature was likely used in the ggcorrplot package.
Please report the issue at <https://github.com/kassambara/ggcorrplot/issues>.
14.2 Area 2: Legal System and Property Rights (Within-Area Correlations)
corplot(area2_corr, sz = 2.0, title = "Area 2: Legal System and Property Rights (scaled)")14.3 Area 3: Sound Money (Within-Area Correlations)
corplot(area3_corr, sz = 2.5, title = "Area 3: Sound Money")14.4 Area 4: Freedom to Trade Internationally (Within-Area Correlations)
corplot(area4_corr, sz = 1.6, title = "Area 4: Freedom to Trade Internationally")14.5 Area 5: Regulation (Within-Area Correlations)
# Area 5 has many indicators, so labels and coefficients are smaller to keep the plot readable.
corplot(area5_corr, sz = 1.0, title = "Area 5: Regulation")14.6 Correlations Across the Five Official EFI Area Scores
area_scores_corr <- df_2022_manual_subcomponent %>%
select(Area1, Area2, Area3, Area4, Area5) %>%
rename(
`Area 1` = Area1,
`Area 2` = Area2,
`Area 3` = Area3,
`Area 4` = Area4,
`Area 5` = Area5
)
corplot(area_scores_corr, sz = 3.0, title = "Correlations Across the Five Official EFI Area Scores")Interpretation notes:
- Strong positive clustering within an area supports (but does not prove) that the area measures a coherent dimension.
- Weak or mixed correlations within an area can signal that equal aggregation may be combining distinct constructs.
- These plots are descriptive; EFA/CFA below provide the formal measurement tests.
# Export paper assets: correlation heatmaps (same style) + correlation ranges for chart notes.
dir.create("../paper_draft/images", recursive = TRUE, showWarnings = FALSE)
ggsave("../paper_draft/images/area1_corr_heatmap.png",
plot = corplot(area1_corr, sz = 2.2, title = "Area 1: Size of Government"),
width = 9.5, height = 7.5, dpi = 300)
ggsave("../paper_draft/images/area2_corr_heatmap.png",
plot = corplot(area2_corr, sz = 2.0, title = "Area 2: Legal System and Property Rights (scaled)"),
width = 10.5, height = 8.5, dpi = 300)
ggsave("../paper_draft/images/area3_corr_heatmap.png",
plot = corplot(area3_corr, sz = 2.5, title = "Area 3: Sound Money"),
width = 7, height = 5.5, dpi = 300)
ggsave("../paper_draft/images/area4_corr_heatmap.png",
plot = corplot(area4_corr, sz = 1.6, title = "Area 4: Freedom to Trade Internationally"),
width = 10, height = 8, dpi = 300)
ggsave("../paper_draft/images/area5_corr_heatmap.png",
plot = corplot(area5_corr, sz = 1.0, title = "Area 5: Regulation"),
width = 12, height = 10, dpi = 300)
ggsave("../paper_draft/images/area_scores_corr_heatmap.png",
plot = corplot(area_scores_corr, sz = 3.0, title = "Correlations Across the Five Official EFI Area Scores"),
width = 8.5, height = 7, dpi = 300)
dir.create("../paper_draft/tables", recursive = TRUE, showWarnings = FALSE)
write.csv(cor(area_scores_corr, use = "pairwise.complete.obs"),
"../paper_draft/tables/paper_area_scores_corr_matrix_v7.csv",
row.names = TRUE)
paper_corr_ranges <- dplyr::bind_rows(
pairwise_corr_range(area1_corr, "Area 1"),
pairwise_corr_range(area2_corr, "Area 2"),
pairwise_corr_range(area3_corr, "Area 3"),
pairwise_corr_range(area4_corr, "Area 4"),
pairwise_corr_range(area5_corr, "Area 5")
) %>%
mutate(across(c(MinCorr, MaxCorr), ~ round(.x, 3)))
write.csv(paper_corr_ranges, "../paper_draft/tables/paper_corr_ranges_v7.csv", row.names = FALSE)
paper_corr_ranges# A tibble: 5 × 3
Area MinCorr MaxCorr
<chr> <dbl> <dbl>
1 Area 1 -0.321 0.706
2 Area 2 0.389 0.924
3 Area 3 0.038 0.603
4 Area 4 -0.041 0.763
5 Area 5 -0.102 0.843
15 Run EFA
15.1 Build Indicator Matrix and Run Diagnostics
# Indicator-level EFA input (same specification as the original analysis logic)
efa_data <- df_2022_manual_subcomponent %>%
select(
# Area 1 (6)
X1A, X1B, X1C, X1D_i, X1D_ii, X1E,
# Area 2 (8, scaled)
X2A_scaled, X2B_scaled, X2C_scaled, X2D_scaled,
X2E_scaled, X2F_scaled, X2G_scaled, X2H_scaled,
# Area 3 (4)
X3A, X3B, X3C, X3D,
# Area 4 (10)
X4A_i, X4A_ii, X4A_iii,
X4B_i, X4B_ii,
X4C,
X4D_i, X4D_ii, X4D_iii, X4D_iv,
# Area 5 (17)
X5A_i, X5A_ii, X5A_iii,
X5B_i, X5B_ii, X5B_iii, X5B_iv, X5B_v, X5B_vi, X5B_vii,
X5C_i, X5C_ii, X5C_iii, X5C_iv,
X5D_i, X5D_ii, X5D_iii
)
efa_data_complete <- na.omit(efa_data)
cor_matrix <- cor(efa_data_complete, use = "pairwise.complete.obs")
kmo_result <- KMO(cor_matrix)
bartlett_result <- cortest.bartlett(cor_matrix, n = nrow(efa_data_complete))
# Drop the same four low-MSA variables used in your earlier analysis
low_msa_drop <- c("X5B_iv", "X5B_vi", "X5A_iii", "X4A_iii")
efa_data_clean <- efa_data_complete %>% select(-all_of(low_msa_drop))
# Return key dimensions/missingness as actual object output
rbind(
`Raw EFA matrix` = dim(efa_data),
`Complete-case EFA matrix` = dim(efa_data_complete),
`After low-MSA drops` = dim(efa_data_clean)
) [,1] [,2]
Raw EFA matrix 165 45
Complete-case EFA matrix 108 45
After low-MSA drops 108 41
sum(is.na(efa_data))[1] 122
sort(colSums(is.na(efa_data)), decreasing = TRUE) %>% head(10)X2D_scaled X5B_iii X5A_i X1B X4A_i X5C_i X1C
27 24 14 9 9 7 4
X5D_ii X1D_ii X1E
4 3 3
Interpretation:
- The EFA sample size is the complete-case
Nafter listwise deletion. - The
After low-MSA dropsmatrix should be108 x 41if the same filtering is reproduced. - Missingness matters because it can change which countries enter the factor model.
kmo_resultKaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor_matrix)
Overall MSA = 0.86
MSA for each item =
X1A X1B X1C X1D_i X1D_ii X1E X2A_scaled
0.87 0.85 0.81 0.68 0.63 0.82 0.90
X2B_scaled X2C_scaled X2D_scaled X2E_scaled X2F_scaled X2G_scaled X2H_scaled
0.89 0.93 0.89 0.94 0.90 0.79 0.90
X3A X3B X3C X3D X4A_i X4A_ii X4A_iii
0.60 0.78 0.68 0.87 0.83 0.90 0.55
X4B_i X4B_ii X4C X4D_i X4D_ii X4D_iii X4D_iv
0.89 0.94 0.86 0.91 0.87 0.65 0.92
X5A_i X5A_ii X5A_iii X5B_i X5B_ii X5B_iii X5B_iv
0.74 0.79 0.52 0.72 0.80 0.69 0.30
X5B_v X5B_vi X5B_vii X5C_i X5C_ii X5C_iii X5C_iv
0.73 0.49 0.91 0.65 0.90 0.91 0.91
X5D_i X5D_ii X5D_iii
0.93 0.59 0.91
bartlett_result$chisq
[1] 4321.074
$p.value
[1] 0
$df
[1] 990
Interpretation:
KMOassesses whether the correlation structure is suitable for factor analysis (higher is better).- Bartlett’s test checks whether the correlation matrix is significantly different from identity (small p-value supports factor analysis).
15.2 Parallel Analysis and VSS (Factor Count Guidance)
parallel_results <- fa.parallel(
efa_data_clean,
fa = "fa",
fm = "ml",
n.iter = 100
)Parallel analysis suggests that the number of factors = 4 and the number of components = NA
scree(efa_data_clean, factors = TRUE)vss_results <- VSS(
efa_data_clean,
n = 10,
rotate = "varimax",
fm = "ml"
)# Print object classes / summaries so the notebook records what was estimated
class(parallel_results)[1] "psych" "parallel"
class(vss_results)[1] "psych" "vss"
Interpretation:
- Parallel analysis and scree plots are heuristics for the number of factors.
- They support model selection but should be considered alongside interpretability and theory.
15.3 Fit the 4-Factor EFA Model
efa_4factor <- fa(
efa_data_clean,
nfactors = 4,
rotate = "varimax",
fm = "ml",
scores = "regression"
)
print(efa_4factor, cut = 0.30, sort = TRUE)Factor Analysis using method = ml
Call: fa(r = efa_data_clean, nfactors = 4, rotate = "varimax", scores = "regression",
fm = "ml")
Standardized loadings (pattern matrix) based upon correlation matrix
item ML2 ML3 ML4 ML1 h2 u2 com
X2E_scaled 11 0.88 0.33 0.92 0.084 1.4
X2A_scaled 7 0.82 0.43 0.89 0.114 1.6
X5C_iii 37 0.81 0.76 0.236 1.4
X2B_scaled 8 0.80 0.46 0.92 0.076 1.9
X2D_scaled 10 0.78 0.73 0.266 1.4
X2H_scaled 14 0.70 0.47 0.82 0.179 2.3
X2C_scaled 9 0.68 0.60 0.31 0.94 0.063 2.4
X2F_scaled 12 0.67 0.41 0.41 0.82 0.177 2.6
X1A 1 -0.67 0.49 0.514 1.2
X1B 2 -0.63 0.52 0.478 1.7
X5C_ii 36 0.54 0.52 0.61 0.386 2.4
X4B_ii 22 0.53 0.47 0.53 0.466 2.2
X1D_i 4 -0.46 0.25 0.750 1.4
X1C 3 0.37 0.30 0.27 0.726 2.6
X4D_iii 26 0.15 0.850 2.4
X5D_iii 41 0.87 0.88 0.118 1.3
X4D_iv 27 0.42 0.79 0.85 0.148 1.7
X5D_i 39 0.36 0.75 0.75 0.245 1.7
X4B_i 21 0.37 0.71 0.70 0.297 1.8
X4D_i 24 0.52 0.66 0.76 0.241 2.1
X5B_vii 34 0.64 0.43 0.569 1.1
X1E 6 0.55 -0.46 0.57 0.426 2.3
X4C 23 0.53 0.31 0.686 1.3
X4A_i 19 0.50 0.32 0.680 1.5
X4A_ii 20 0.39 0.50 0.45 0.548 2.4
X3B 16 0.49 0.28 0.724 1.3
X4D_ii 25 0.31 0.47 0.33 0.666 1.9
X3D 18 0.41 0.31 0.691 2.6
X5A_i 28 0.37 0.18 0.816 1.7
X3C 17 0.35 0.19 0.810 2.0
X2G_scaled 13 0.31 0.33 0.24 0.763 2.6
X5A_ii 29 0.11 0.891 2.8
X5C_i 35 0.73 0.59 0.412 1.2
X5B_i 30 0.33 0.53 0.44 0.562 2.0
X5C_iv 38 0.38 0.37 0.53 0.58 0.424 2.7
X5D_ii 40 0.47 0.22 0.775 1.0
X1D_ii 5 -0.33 0.41 0.31 0.686 2.4
X5B_iii 32 0.33 0.21 0.794 2.7
X3A 15 0.31 0.18 0.824 2.4
X5B_ii 31 0.88 1.00 0.005 1.6
X5B_v 33 0.31 0.87 0.86 0.140 1.3
ML2 ML3 ML4 ML1
SS loadings 8.51 7.88 3.24 2.06
Proportion Var 0.21 0.19 0.08 0.05
Cumulative Var 0.21 0.40 0.48 0.53
Proportion Explained 0.39 0.36 0.15 0.10
Cumulative Proportion 0.39 0.76 0.90 1.00
Mean item complexity = 1.9
Test of the hypothesis that 4 factors are sufficient.
df null model = 820 with the objective function = 44.32 with Chi Square = 4099.31
df of the model are 662 and the objective function was 14.41
The root mean square of the residuals (RMSR) is 0.07
The df corrected root mean square of the residuals is 0.08
The harmonic n.obs is 108 with the empirical chi square 941.71 with prob < 4.3e-12
The total n.obs was 108 with Likelihood Chi Square = 1294.91 with prob < 2.3e-43
Tucker Lewis Index of factoring reliability = 0.752
RMSEA index = 0.094 and the 90 % confidence intervals are 0.087 0.102
BIC = -1804.66
Fit based upon off diagonal values = 0.97
Measures of factor score adequacy
ML2 ML3 ML4 ML1
Correlation of (regression) scores with factors 0.98 0.96 0.93 0.99
Multiple R square of scores with factors 0.95 0.93 0.86 0.98
Minimum correlation of possible factor scores 0.90 0.85 0.72 0.97
efa_fit_table <- tibble::tibble(
Metric = c("TLI", "RMSEA", "RMSEA CI lower", "RMSEA CI upper"),
Value = c(efa_4factor$TLI, efa_4factor$RMSEA[1], efa_4factor$RMSEA[2], efa_4factor$RMSEA[3])
) %>%
mutate(Value = round(Value, 3))
variance_table <- data.frame(
Factor = paste0("Factor", 1:4),
SS_Loadings = efa_4factor$Vaccounted[1, ],
Proportion_Variance = efa_4factor$Vaccounted[2, ],
Cumulative_Variance = efa_4factor$Vaccounted[3, ]
) %>%
mutate(across(where(is.numeric), ~ round(.x, 3)))
efa_fit_table# A tibble: 4 × 2
Metric Value
<chr> <dbl>
1 TLI 0.752
2 RMSEA 0.094
3 RMSEA CI lower 0.087
4 RMSEA CI upper 0.102
knitr::kable(variance_table, caption = "EFA variance explained (4-factor solution)") %>%
kableExtra::kable_styling(full_width = FALSE)| Factor | SS_Loadings | Proportion_Variance | Cumulative_Variance | |
|---|---|---|---|---|
| ML2 | Factor1 | 8.506 | 0.207 | 0.207 |
| ML3 | Factor2 | 7.878 | 0.192 | 0.400 |
| ML4 | Factor3 | 3.243 | 0.079 | 0.479 |
| ML1 | Factor4 | 2.064 | 0.050 | 0.529 |
Interpretation:
TLIandRMSEAsummarize how well the 4-factor solution reproduces the correlation structure.- The variance shares are the basis for the empirical factor weights used in the ranking exercise.
loadings_df <- as.data.frame(unclass(efa_4factor$loadings))
loadings_df$Variable <- rownames(loadings_df)
loading_cols <- grep("^ML", names(loadings_df), value = TRUE)
loadings_df$Max_Loading <- apply(abs(loadings_df[, loading_cols, drop = FALSE]), 1, max)
loadings_df$Primary_Factor <- apply(abs(loadings_df[, loading_cols, drop = FALSE]), 1, which.max)
loadings_df$Variable <- ifelse(
loadings_df$Variable %in% names(efa_var_labels),
paste0(loadings_df$Variable, ": ", efa_var_labels[loadings_df$Variable]),
loadings_df$Variable
)
primary_loadings <- loadings_df %>%
filter(Max_Loading > 0.30) %>%
arrange(Primary_Factor, desc(Max_Loading))
knitr::kable(primary_loadings, digits = 3, caption = "Primary factor assignments (loadings > 0.30)") %>%
kableExtra::kable_styling(full_width = FALSE, font_size = 10)| ML2 | ML3 | ML4 | ML1 | Variable | Max_Loading | Primary_Factor | |
|---|---|---|---|---|---|---|---|
| X2E_scaled | 0.883 | 0.329 | 0.050 | 0.158 | X2E_scaled: Legal integrity* | 0.883 | 1 |
| X2A_scaled | 0.819 | 0.434 | 0.102 | 0.127 | X2A_scaled: Judicial independence* | 0.819 | 1 |
| X5C_iii | 0.805 | 0.272 | -0.124 | 0.163 | X5C_iii: Impartial public administration | 0.805 | 1 |
| X2B_scaled | 0.798 | 0.458 | 0.239 | 0.142 | X2B_scaled: Impartial courts* | 0.798 | 1 |
| X2D_scaled | 0.784 | 0.286 | 0.011 | 0.193 | X2D_scaled: Military interference* | 0.784 | 1 |
| X2H_scaled | 0.699 | 0.293 | 0.467 | 0.168 | X2H_scaled: Police reliability* | 0.699 | 1 |
| X2C_scaled | 0.683 | 0.602 | 0.315 | 0.095 | X2C_scaled: Property rights* | 0.683 | 1 |
| X2F_scaled | 0.668 | 0.407 | 0.410 | 0.205 | X2F_scaled: Contract enforcement* | 0.668 | 1 |
| X1A | -0.667 | -0.016 | -0.186 | -0.080 | X1A: Government consumption | 0.667 | 1 |
| X1B | -0.629 | -0.290 | 0.119 | -0.168 | X1B: Transfers and subsidies | 0.629 | 1 |
| X5C_ii | 0.536 | 0.519 | 0.235 | -0.055 | X5C_ii: Bureaucracy costs | 0.536 | 1 |
| X4B_ii | 0.532 | 0.475 | 0.140 | 0.075 | X4B_ii: Trade compliance costs | 0.532 | 1 |
| X1D_i | -0.458 | -0.037 | 0.195 | -0.024 | X1D_i: Top marginal income tax rate | 0.458 | 1 |
| X1C | 0.371 | 0.303 | -0.172 | -0.122 | X1C: Government investment | 0.371 | 1 |
| X5D_iii | 0.296 | 0.874 | 0.114 | 0.130 | X5D_iii: Distortion of business environment | 0.874 | 2 |
| X4D_iv | 0.425 | 0.787 | 0.186 | 0.136 | X4D_iv: Protection of foreign assets | 0.787 | 2 |
| X5D_i | 0.359 | 0.754 | 0.200 | 0.134 | X5D_i: Market openness | 0.754 | 2 |
| X4B_i | 0.369 | 0.708 | 0.232 | 0.109 | X4B_i: Non-tariff barriers | 0.708 | 2 |
| X4D_i | 0.523 | 0.664 | 0.188 | 0.097 | X4D_i: Financial openness | 0.664 | 2 |
| X5B_vii | 0.020 | 0.644 | 0.092 | 0.083 | X5B_vii: Foreign labor | 0.644 | 2 |
| X1E | 0.241 | 0.553 | -0.457 | 0.038 | X1E: State ownership of assets | 0.553 | 2 |
| X4C | 0.119 | 0.529 | 0.088 | -0.113 | X4C: Black market exchange rates | 0.529 | 2 |
| X4A_i | 0.147 | 0.505 | 0.189 | 0.091 | X4A_i: Trade tax revenue | 0.505 | 2 |
| X4A_ii | 0.395 | 0.496 | 0.180 | 0.134 | X4A_ii: Mean tariff rate | 0.496 | 2 |
| X3B | 0.137 | 0.493 | -0.113 | 0.037 | X3B: Inflation volatility | 0.493 | 2 |
| X4D_ii | 0.305 | 0.474 | 0.120 | 0.047 | X4D_ii: Capital controls | 0.474 | 2 |
| X3D | 0.288 | 0.406 | 0.232 | 0.084 | X3D: Foreign currency accounts | 0.406 | 2 |
| X5A_i | 0.132 | 0.372 | -0.087 | 0.144 | X5A_i: Ownership of banks | 0.372 | 2 |
| X3C | 0.099 | 0.347 | 0.243 | 0.004 | X3C: Recent inflation | 0.347 | 2 |
| X2G_scaled | 0.313 | 0.326 | 0.165 | 0.077 | X2G_scaled: Real property restrictions* | 0.326 | 2 |
| X5C_i | 0.174 | 0.080 | 0.734 | 0.113 | X5C_i: Regulatory burden | 0.734 | 3 |
| X5B_i | 0.108 | 0.333 | 0.535 | 0.171 | X5B_i: Hiring regulations and minimum wage | 0.535 | 3 |
| X5C_iv | 0.384 | 0.369 | 0.532 | 0.093 | X5C_iv: Tax compliance | 0.532 | 3 |
| X5D_ii | 0.032 | -0.011 | 0.472 | 0.015 | X5D_ii: Business permits | 0.472 | 3 |
| X1D_ii | -0.331 | -0.018 | 0.408 | -0.193 | X1D_ii: Top marginal income and payroll tax rate | 0.408 | 3 |
| X5B_iii | -0.227 | 0.208 | 0.326 | 0.069 | X5B_iii: Centralized collective bargaining | 0.326 | 3 |
| X3A | -0.072 | 0.252 | 0.306 | 0.116 | X3A: Money growth | 0.306 | 3 |
| X5B_ii | 0.240 | 0.289 | 0.270 | 0.884 | X5B_ii: Hiring and firing regulations | 0.884 | 4 |
| X5B_v | 0.309 | 0.060 | 0.091 | 0.868 | X5B_v: Mandated cost of worker dismissal | 0.868 | 4 |
Interpretation:
- This table is for substantive interpretation of the empirical dimensions.
- Cross-loadings or weak loadings should be reported as limitations in the paper.
# Paper-facing EFA loading heatmap (exported for manuscript inclusion).
loadings_long <- loadings_df %>%
select(Variable, all_of(loading_cols), Primary_Factor, Max_Loading) %>%
tidyr::pivot_longer(cols = all_of(loading_cols), names_to = "Factor", values_to = "Loading")
factor_labels <- setNames(paste0("Factor ", seq_along(loading_cols)), loading_cols)
loadings_long <- loadings_long %>%
mutate(
Factor = unname(factor_labels[Factor]),
Factor = factor(Factor, levels = unname(factor_labels)),
Variable = factor(
Variable,
levels = rev(primary_loadings %>% arrange(Primary_Factor, desc(Max_Loading)) %>% pull(Variable))
)
) %>%
filter(!is.na(Variable))
efa_loading_heatmap_v7 <- ggplot(loadings_long, aes(x = Factor, y = Variable, fill = Loading)) +
geom_tile(color = "grey85", linewidth = 0.2) +
geom_text(
aes(label = ifelse(abs(Loading) >= 0.30, sprintf("%.2f", Loading), "")),
size = 2.5
) +
scale_fill_gradient2(
low = "#d73027",
mid = "white",
high = "#1a9850",
midpoint = 0,
limits = c(-1, 1),
oob = scales::squish
) +
labs(
title = "4-Factor EFA Loading Heatmap",
subtitle = "Rows sorted by primary loading; labels shown for |loading| >= 0.30",
x = NULL, y = NULL, fill = "Loading"
) +
theme_minimal(base_size = 10) +
theme(
panel.grid = element_blank(),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 7),
plot.title = element_text(face = "bold"),
legend.position = "right"
)
dir.create("../paper_draft/images", recursive = TRUE, showWarnings = FALSE)
ggsave("../paper_draft/images/efa_loading_heatmap.png", efa_loading_heatmap_v7, width = 8.8, height = 11.5, dpi = 300)
write.csv(primary_loadings, "../data/clean/efa_primary_loadings_v7.csv", row.names = FALSE)
efa_loading_heatmap_v715.4 EFA in lavaan
Why the main EFA above uses psych::fa():
psychis more convenient for exploratory workflows (parallel analysis, VSS, rotated loadings, quick factor scores).lavaanis used later for CFA because it is stronger for confirmatory model comparison and SEM-style fit reporting.
Why add lavaan EFA:
- It provides a robustness check using the same Component/Subcomponent matrix inside the same modeling framework used for CFA.
- It helps compare whether the exploratory loading structure is broadly similar across implementations.
# Build a 4-factor exploratory block in lavaan using the same cleaned EFA matrix.
efa_vars_lav <- colnames(efa_data_clean)
lavaan_efa_syntax <- paste(
c(
paste0('efa("efa1")*F1 =~ ', paste(efa_vars_lav, collapse = " + ")),
paste0('efa("efa1")*F2 =~ ', paste(efa_vars_lav, collapse = " + ")),
paste0('efa("efa1")*F3 =~ ', paste(efa_vars_lav, collapse = " + ")),
paste0('efa("efa1")*F4 =~ ', paste(efa_vars_lav, collapse = " + "))
),
collapse = "\n"
)
lavaan_efa_fit <- with_detectcores_fallback(
lavaan::sem(
model = lavaan_efa_syntax,
data = efa_data_clean,
std.lv = TRUE,
rotation = "varimax",
missing = "fiml",
ncpus = 1L
)
)Warning: lavaan->lav_object_post_check():
some estimated ov variances are negative
lavaan_efa_fitlavaan 0.6-19 ended normally after 448 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 246
Row rank of the constraints matrix 6
Rotation method VARIMAX ORTHOGONAL
Rotation algorithm (rstarts) GPA (30)
Standardized metric TRUE
Row weights Kaiser
Number of observations 108
Number of missing patterns 1
Model Test User Model:
Test statistic 1538.945
Degrees of freedom 662
P-value (Chi-square) 0.000
lavaan_efa_fit_measures <- fitMeasures(
lavaan_efa_fit,
c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "srmr", "aic", "bic")
)
round(lavaan_efa_fit_measures, 3) chisq df pvalue cfi tli
1538.945 662.000 0.000 0.779 0.726
rmsea rmsea.ci.lower rmsea.ci.upper srmr aic
0.111 0.104 0.118 0.071 15814.763
bic
16458.474
lavaan_efa_loadings <- standardizedSolution(lavaan_efa_fit) %>%
dplyr::filter(op == "=~") %>%
dplyr::select(Factor = lhs, Variable = rhs, Std_Loading = est.std) %>%
dplyr::mutate(Abs_Loading = abs(Std_Loading)) %>%
dplyr::arrange(Factor, dplyr::desc(Abs_Loading))
knitr::kable(
dplyr::filter(lavaan_efa_loadings, Abs_Loading >= 0.30),
digits = 3,
caption = "lavaan EFA standardized loadings (|loading| >= 0.30)"
) %>%
kableExtra::kable_styling(full_width = FALSE, font_size = 10)| Factor | Variable | Std_Loading | Abs_Loading |
|---|---|---|---|
| F1 | X5C_i | 0.725 | 0.725 |
| F1 | X5C_iv | 0.530 | 0.530 |
| F1 | X5B_i | 0.508 | 0.508 |
| F1 | X2H_scaled | 0.490 | 0.490 |
| F1 | X1E | -0.485 | 0.485 |
| F1 | X5D_ii | 0.468 | 0.468 |
| F1 | X2F_scaled | 0.422 | 0.422 |
| F1 | X1D_ii | 0.364 | 0.364 |
| F1 | X5B_iii | 0.321 | 0.321 |
| F1 | X2C_scaled | 0.310 | 0.310 |
| F2 | X2E_scaled | 0.894 | 0.894 |
| F2 | X2A_scaled | 0.826 | 0.826 |
| F2 | X5C_iii | 0.818 | 0.818 |
| F2 | X2B_scaled | 0.804 | 0.804 |
| F2 | X2D_scaled | 0.795 | 0.795 |
| F2 | X2H_scaled | 0.703 | 0.703 |
| F2 | X2F_scaled | 0.681 | 0.681 |
| F2 | X2C_scaled | 0.677 | 0.677 |
| F2 | X1A | -0.656 | 0.656 |
| F2 | X1B | -0.641 | 0.641 |
| F2 | X4B_ii | 0.521 | 0.521 |
| F2 | X4D_i | 0.513 | 0.513 |
| F2 | X5C_ii | 0.505 | 0.505 |
| F2 | X1D_i | -0.462 | 0.462 |
| F2 | X4D_iv | 0.430 | 0.430 |
| F2 | X5B_v | 0.407 | 0.407 |
| F2 | X4A_ii | 0.395 | 0.395 |
| F2 | X5C_iv | 0.382 | 0.382 |
| F2 | X5D_i | 0.371 | 0.371 |
| F2 | X4B_i | 0.363 | 0.363 |
| F2 | X1D_ii | -0.359 | 0.359 |
| F2 | X1C | 0.346 | 0.346 |
| F2 | X2G_scaled | 0.314 | 0.314 |
| F2 | X5D_iii | 0.309 | 0.309 |
| F3 | X5B_ii | 2.253 | 2.253 |
| F3 | X5B_v | 0.322 | 0.322 |
| F4 | X5D_iii | 0.871 | 0.871 |
| F4 | X4D_iv | 0.793 | 0.793 |
| F4 | X5D_i | 0.749 | 0.749 |
| F4 | X4B_i | 0.721 | 0.721 |
| F4 | X4D_i | 0.678 | 0.678 |
| F4 | X5B_vii | 0.649 | 0.649 |
| F4 | X2C_scaled | 0.608 | 0.608 |
| F4 | X1E | 0.551 | 0.551 |
| F4 | X4C | 0.536 | 0.536 |
| F4 | X5C_ii | 0.528 | 0.528 |
| F4 | X4A_ii | 0.513 | 0.513 |
| F4 | X4A_i | 0.511 | 0.511 |
| F4 | X3B | 0.494 | 0.494 |
| F4 | X4D_ii | 0.488 | 0.488 |
| F4 | X4B_ii | 0.485 | 0.485 |
| F4 | X2B_scaled | 0.466 | 0.466 |
| F4 | X2A_scaled | 0.439 | 0.439 |
| F4 | X3D | 0.418 | 0.418 |
| F4 | X2F_scaled | 0.418 | 0.418 |
| F4 | X5A_i | 0.387 | 0.387 |
| F4 | X5C_iv | 0.378 | 0.378 |
| F4 | X3C | 0.361 | 0.361 |
| F4 | X5B_i | 0.336 | 0.336 |
| F4 | X2E_scaled | 0.334 | 0.334 |
| F4 | X2G_scaled | 0.329 | 0.329 |
| F4 | X2H_scaled | 0.302 | 0.302 |
Interpretation:
- Broad agreement with
psych::fa()strengthens the claim that the factor structure is not an artifact of a single package. - Differences can arise from estimation/rotation details and do not automatically invalidate either solution.
- The main empirical weighting pipeline below still uses
psych::fa()factor scores for continuity with the earlier analysis.
16 Empirical Weights and Rank Sensitivity
# Factor scores for complete-case countries only
factor_scores <- as.data.frame(efa_4factor$scores)
colnames(factor_scores) <- c("Factor1", "Factor2", "Factor3", "Factor4")
complete_countries <- df_2022_manual_subcomponent %>%
slice(which(complete.cases(efa_data))) %>%
pull(Countries)
factor_scores$Countries <- complete_countries
# Equal weighting across empirical factors
factor_scores <- factor_scores %>%
mutate(
EFI_Empirical_Equal = (Factor1 + Factor2 + Factor3 + Factor4) / 4,
Rank_Empirical_Equal = rank(-EFI_Empirical_Equal, ties.method = "min")
)
# Variance-based empirical weights (same logic as earlier versions)
variance_weights <- efa_4factor$Vaccounted[2, ] / sum(efa_4factor$Vaccounted[2, ])
factor_scores <- factor_scores %>%
mutate(
EFI_Empirical_Weighted =
variance_weights[1] * Factor1 +
variance_weights[2] * Factor2 +
variance_weights[3] * Factor3 +
variance_weights[4] * Factor4,
Rank_Empirical_Weighted = rank(-EFI_Empirical_Weighted, ties.method = "min"),
EFI_Empirical_Equal_Scaled = as.numeric(5 + 2.5 * scale(EFI_Empirical_Equal)),
EFI_Empirical_Weighted_Scaled = as.numeric(5 + 2.5 * scale(EFI_Empirical_Weighted))
)
comparison <- df_2022_manual_subcomponent %>%
filter(Countries %in% factor_scores$Countries) %>%
select(Countries, Rank, `Economic Freedom Summary Index`) %>%
left_join(
factor_scores %>%
select(
Countries,
Rank_Empirical_Equal,
Rank_Empirical_Weighted,
EFI_Empirical_Equal_Scaled,
EFI_Empirical_Weighted_Scaled
),
by = "Countries"
) %>%
mutate(
Rank_Change_Equal = Rank - Rank_Empirical_Equal,
Rank_Change_Weighted = Rank - Rank_Empirical_Weighted
) %>%
arrange(Rank)
# Save outputs for the paper draft
write.csv(comparison, "../data/clean/efa_comparison_results.csv", row.names = FALSE)
write.csv(factor_scores, "../data/clean/efa_factor_scores.csv", row.names = FALSE)weights_tbl <- tibble::tibble(
Factor = paste0("Factor", seq_along(variance_weights)),
Weight = scales::percent(variance_weights, accuracy = 0.1)
)
knitr::kable(weights_tbl, caption = "Variance-based empirical factor weights") %>%
kableExtra::kable_styling(full_width = FALSE)| Factor | Weight |
|---|---|
| Factor1 | 39.2% |
| Factor2 | 36.3% |
| Factor3 | 15.0% |
| Factor4 | 9.5% |
Interpretation:
- If the empirical weights are far from equal, that is direct evidence against the equal-weighting assumption as a neutral default.
rank_summary_table_local(comparison)# A tibble: 7 × 2
Metric Value
<chr> <chr>
1 Countries 108
2 Mean absolute rank change 19.5
3 Median absolute rank change 14
4 Countries with |rank change| > 10 64 (59.3%)
5 Countries with |rank change| > 20 41 (38.0%)
6 Spearman correlation 0.939
7 Pearson correlation 0.926
Interpretation:
- Correlation summarizes overall similarity in rankings, but the rank-change distribution captures practical ranking sensitivity.
gainers <- comparison %>%
filter(Rank_Change_Weighted > 0) %>%
arrange(desc(Rank_Change_Weighted)) %>%
slice_head(n = 10) %>%
select(Countries, Rank, Rank_Empirical_Weighted, Rank_Change_Weighted)
losers <- comparison %>%
filter(Rank_Change_Weighted < 0) %>%
arrange(Rank_Change_Weighted) %>%
slice_head(n = 10) %>%
select(Countries, Rank, Rank_Empirical_Weighted, Rank_Change_Weighted)
knitr::kable(gainers, caption = "Top 10 gainers under empirical weighting") %>%
kableExtra::kable_styling(full_width = FALSE)| Countries | Rank | Rank_Empirical_Weighted | Rank_Change_Weighted |
|---|---|---|---|
| Argentina | 159 | 88 | 71 |
| Türkiye | 138 | 72 | 66 |
| Ethiopia | 148 | 89 | 59 |
| Algeria | 161 | 103 | 58 |
| Ukraine | 150 | 93 | 57 |
| Zimbabwe | 164 | 107 | 57 |
| Angola | 155 | 104 | 51 |
| Iran, Islamic Rep. | 158 | 108 | 50 |
| Sri Lanka | 123 | 74 | 49 |
| Malawi | 140 | 92 | 48 |
knitr::kable(losers, caption = "Top 10 losers under empirical weighting") %>%
kableExtra::kable_styling(full_width = FALSE)| Countries | Rank | Rank_Empirical_Weighted | Rank_Change_Weighted |
|---|---|---|---|
| El Salvador | 59 | 81 | -22 |
| Panama | 23 | 43 | -20 |
| Guatemala | 39 | 59 | -20 |
| Honduras | 67 | 86 | -19 |
| Armenia | 36 | 53 | -17 |
| Malaysia | 29 | 45 | -16 |
| Albania | 38 | 52 | -14 |
| Bahrain | 34 | 46 | -12 |
| Indonesia | 64 | 76 | -12 |
| Philippines | 59 | 70 | -11 |
# Export paper-facing ranking/weight tables close to where they are created.
paper_rank_summary_v7 <- comparison %>%
summarise(
n = n(),
spearman = cor(Rank, Rank_Empirical_Weighted, method = "spearman", use = "complete.obs"),
pearson = cor(Rank, Rank_Empirical_Weighted, method = "pearson", use = "complete.obs"),
mean_abs_change = mean(abs(Rank_Change_Weighted), na.rm = TRUE),
median_abs_change = median(abs(Rank_Change_Weighted), na.rm = TRUE),
gt10 = sum(abs(Rank_Change_Weighted) > 10, na.rm = TRUE),
gt20 = sum(abs(Rank_Change_Weighted) > 20, na.rm = TRUE)
)
paper_weights_v7 <- tibble::tibble(
Factor = paste0("Factor ", seq_along(variance_weights)),
Weight = as.numeric(variance_weights)
)
paper_summary_stats_v7 <- tibble::tibble(
Metric = c(
"Countries in rank-comparison sample",
"EFI score (original): mean",
"EFI score (original): SD",
"EFI score (original): min / max",
"Empirical weighted score (scaled): mean",
"Empirical weighted score (scaled): SD",
"Empirical weighted score (scaled): min / max",
"Absolute rank change (weighted): mean",
"Absolute rank change (weighted): median",
"Absolute rank change (weighted): max"
),
Value = c(
nrow(comparison),
round(mean(comparison$`Economic Freedom Summary Index`, na.rm = TRUE), 3),
round(sd(comparison$`Economic Freedom Summary Index`, na.rm = TRUE), 3),
sprintf("%.3f / %.3f",
min(comparison$`Economic Freedom Summary Index`, na.rm = TRUE),
max(comparison$`Economic Freedom Summary Index`, na.rm = TRUE)),
round(mean(comparison$EFI_Empirical_Weighted_Scaled, na.rm = TRUE), 3),
round(sd(comparison$EFI_Empirical_Weighted_Scaled, na.rm = TRUE), 3),
sprintf("%.3f / %.3f",
min(comparison$EFI_Empirical_Weighted_Scaled, na.rm = TRUE),
max(comparison$EFI_Empirical_Weighted_Scaled, na.rm = TRUE)),
round(mean(abs(comparison$Rank_Change_Weighted), na.rm = TRUE), 1),
round(median(abs(comparison$Rank_Change_Weighted), na.rm = TRUE), 1),
max(abs(comparison$Rank_Change_Weighted), na.rm = TRUE)
)
)
# Paper-facing exports consumed by the paper file.
dir.create("../paper_draft/tables", recursive = TRUE, showWarnings = FALSE)
write.csv(paper_rank_summary_v7, "../paper_draft/tables/paper_rank_summary_v7.csv", row.names = FALSE)
write.csv(gainers, "../paper_draft/tables/paper_top_gainers_v7.csv", row.names = FALSE)
write.csv(losers, "../paper_draft/tables/paper_top_losers_v7.csv", row.names = FALSE)
write.csv(paper_weights_v7, "../paper_draft/tables/paper_weights_v7.csv", row.names = FALSE)
write.csv(paper_summary_stats_v7, "../paper_draft/tables/paper_summary_stats_v7.csv", row.names = FALSE)plot_rank_scatter_local(comparison)plot_rank_change_hist_local(comparison)17 CFA Comparison
cfa_model_4factor <- '
Legal =~ X2E_scaled + X2A_scaled + X2B_scaled + X2D_scaled +
X2H_scaled + X2C_scaled + X2F_scaled + X5C_iii + X1A + X1B
Trade =~ X5D_iii + X4D_iv + X5D_i + X4B_i + X4D_i + X5B_vii + X1E
Business =~ X5C_i + X5B_i
Labor =~ X5B_ii + X5B_v
'
cfa_model_5factor <- '
GovSize =~ X1A + X1B + X1C + X1D_i + X1D_ii + X1E
Legal =~ X2A_scaled + X2B_scaled + X2C_scaled + X2D_scaled +
X2E_scaled + X2F_scaled + X2G_scaled + X2H_scaled
Money =~ X3A + X3B + X3C + X3D
Trade =~ X4A_i + X4A_ii + X4B_i + X4B_ii + X4C +
X4D_i + X4D_ii + X4D_iii + X4D_iv
Regulation =~ X5A_i + X5A_ii + X5B_i + X5B_ii + X5B_iii +
X5B_v + X5B_vii + X5C_i + X5C_ii + X5C_iii +
X5C_iv + X5D_i + X5D_ii + X5D_iii
'
cfa_fit_4factor <- with_detectcores_fallback(
lavaan::cfa(cfa_model_4factor, data = efa_data_clean, std.lv = TRUE, missing = "fiml", ncpus = 1L)
)Warning: lavaan->lav_object_post_check():
some estimated ov variances are negative
cfa_fit_5factor <- with_detectcores_fallback(
lavaan::cfa(cfa_model_5factor, data = efa_data_clean, std.lv = TRUE, missing = "fiml", ncpus = 1L)
)
fit_4factor <- fitMeasures(cfa_fit_4factor, c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "srmr", "aic", "bic"))
fit_5factor <- fitMeasures(cfa_fit_5factor, c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "srmr", "aic", "bic"))
cfa_comparison_table <- data.frame(
Metric = c("CFI", "TLI", "RMSEA", "SRMR", "AIC", "BIC"),
FourFactor = c(fit_4factor["cfi"], fit_4factor["tli"], fit_4factor["rmsea"], fit_4factor["srmr"], fit_4factor["aic"], fit_4factor["bic"]),
FiveFactor = c(fit_5factor["cfi"], fit_5factor["tli"], fit_5factor["rmsea"], fit_5factor["srmr"], fit_5factor["aic"], fit_5factor["bic"]),
Better = c(
ifelse(fit_4factor["cfi"] > fit_5factor["cfi"], "4-Factor", "5-Factor"),
ifelse(fit_4factor["tli"] > fit_5factor["tli"], "4-Factor", "5-Factor"),
ifelse(fit_4factor["rmsea"] < fit_5factor["rmsea"], "4-Factor", "5-Factor"),
ifelse(fit_4factor["srmr"] < fit_5factor["srmr"], "4-Factor", "5-Factor"),
ifelse(fit_4factor["aic"] < fit_5factor["aic"], "4-Factor", "5-Factor"),
ifelse(fit_4factor["bic"] < fit_5factor["bic"], "4-Factor", "5-Factor")
)
)
cfa_anova <- tryCatch(anova(cfa_fit_4factor, cfa_fit_5factor), error = function(e) e)Warning: lavaan->lavTestLRT():
some models are based on a different set of observed variables
round(fit_4factor, 3) chisq df pvalue cfi tli
618.481 183.000 0.000 0.831 0.806
rmsea rmsea.ci.lower rmsea.ci.upper srmr aic
0.148 0.136 0.161 0.102 7439.300
bic
7624.367
round(fit_5factor, 3) chisq df pvalue cfi tli
2240.274 769.000 0.000 0.629 0.604
rmsea rmsea.ci.lower rmsea.ci.upper srmr aic
0.133 0.127 0.140 0.121 16302.091
bic
16658.815
cfa_comparison_table Metric FourFactor FiveFactor Better
cfi CFI 0.8309524 6.290491e-01 4-Factor
tli TLI 0.8060109 6.044476e-01 4-Factor
rmsea RMSEA 0.1484386 1.330981e-01 5-Factor
srmr SRMR 0.1021610 1.210396e-01 4-Factor
aic AIC 7439.3003940 1.630209e+04 4-Factor
bic BIC 7624.3674486 1.665881e+04 4-Factor
cfa_anova
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff
cfa_fit_4factor 183 7439.3 7624.4 618.48
cfa_fit_5factor 769 16302.1 16658.8 2240.27 1621.8 0.12793 586
Pr(>Chisq)
cfa_fit_4factor
cfa_fit_5factor < 2.2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Export paper-facing CFA tables close to the CFA estimation/output section.
paper_cfa_fit_v7 <- tibble::tibble(
Model = c("4-factor (empirical)", "5-factor (EFI-style)"),
CFI = c(unname(fit_4factor["cfi"]), unname(fit_5factor["cfi"])),
TLI = c(unname(fit_4factor["tli"]), unname(fit_5factor["tli"])),
RMSEA = c(unname(fit_4factor["rmsea"]), unname(fit_5factor["rmsea"])),
SRMR = c(unname(fit_4factor["srmr"]), unname(fit_5factor["srmr"])),
AIC = c(unname(fit_4factor["aic"]), unname(fit_5factor["aic"])),
BIC = c(unname(fit_4factor["bic"]), unname(fit_5factor["bic"]))
)
paper_chisq_compare_v7 <- {
if (inherits(cfa_anova, "error")) {
tibble::tibble(
Statistic = c("Chi-square difference", "Difference in df", "p-value"),
Value = c(NA_character_, NA_character_, NA_character_)
)
} else {
an <- as.data.frame(cfa_anova)
nm <- names(an)
chisq_col <- nm[grepl("Chisq[. ]diff", nm)][1]
dfdiff_col <- nm[grepl("Df[. ]diff", nm)][1]
p_col <- nm[grepl("Pr\\(>Chisq\\)", nm)][1]
last <- nrow(an)
tibble::tibble(
Statistic = c("Chi-square difference", "Difference in df", "p-value"),
Value = c(
as.character(round(an[[chisq_col]][last], 1)),
as.character(an[[dfdiff_col]][last]),
as.character(an[[p_col]][last])
)
)
}
}
dir.create("../paper_draft/tables", recursive = TRUE, showWarnings = FALSE)
write.csv(paper_cfa_fit_v7, "../paper_draft/tables/paper_cfa_fit_v7.csv", row.names = FALSE)
write.csv(paper_chisq_compare_v7, "../paper_draft/tables/paper_chisq_compare_v7.csv", row.names = FALSE)Interpretation:
- Compare models using both relative fit (
CFI,TLI,SRMR,AIC,BIC) and absolute fit (especiallyRMSEA). - A better-fitting model can still fit poorly in absolute terms; report both facts.
- To do: if
lavaanreports warnings (e.g., negative variances / Heywood cases), document them explicitly in the paper and treat the CFA results as provisional.
18 Appendix: Correlation Tables
18.1 Correlation Table: Rank and the Five Official Areas
rank_area_corr_df <- df_2022_manual_subcomponent %>%
select(Rank, Area1, Area2, Area3, Area4, Area5)
rank_area_corr <- cor(rank_area_corr_df, use = "pairwise.complete.obs", method = "pearson")
round(rank_area_corr, 3) Rank Area1 Area2 Area3 Area4 Area5
Rank 1.000 -0.055 -0.867 -0.737 -0.858 -0.855
Area1 -0.055 1.000 -0.211 -0.065 -0.080 -0.015
Area2 -0.867 -0.211 1.000 0.495 0.739 0.800
Area3 -0.737 -0.065 0.495 1.000 0.597 0.589
Area4 -0.858 -0.080 0.739 0.597 1.000 0.746
Area5 -0.855 -0.015 0.800 0.589 0.746 1.000
knitr::kable(
round(rank_area_corr, 3),
caption = "Correlation Table: Rank and the Five Official EFI Area Scores"
) %>%
kableExtra::kable_styling(full_width = FALSE)| Rank | Area1 | Area2 | Area3 | Area4 | Area5 | |
|---|---|---|---|---|---|---|
| Rank | 1.000 | -0.055 | -0.867 | -0.737 | -0.858 | -0.855 |
| Area1 | -0.055 | 1.000 | -0.211 | -0.065 | -0.080 | -0.015 |
| Area2 | -0.867 | -0.211 | 1.000 | 0.495 | 0.739 | 0.800 |
| Area3 | -0.737 | -0.065 | 0.495 | 1.000 | 0.597 | 0.589 |
| Area4 | -0.858 | -0.080 | 0.739 | 0.597 | 1.000 | 0.746 |
| Area5 | -0.855 | -0.015 | 0.800 | 0.589 | 0.746 | 1.000 |
18.2 Correlation Table: Rank + Components/Subcomponents by Area
area_component_lists <- list(
`Area 1` = c("X1A", "X1B", "X1C", "X1D_i", "X1D_ii", "X1E"),
`Area 2` = c("X2A_scaled", "X2B_scaled", "X2C_scaled", "X2D_scaled", "X2E_scaled", "X2F_scaled", "X2G_scaled", "X2H_scaled"),
`Area 3` = c("X3A", "X3B", "X3C", "X3D"),
`Area 4` = c("X4A_i", "X4A_ii", "X4A_iii", "X4B_i", "X4B_ii", "X4C", "X4D_i", "X4D_ii", "X4D_iii", "X4D_iv"),
`Area 5` = c("X5A_i", "X5A_ii", "X5A_iii", "X5B_i", "X5B_ii", "X5B_iii", "X5B_iv", "X5B_v", "X5B_vi", "X5B_vii",
"X5C_i", "X5C_ii", "X5C_iii", "X5C_iv", "X5D_i", "X5D_ii", "X5D_iii")
)
for (nm in names(area_component_lists)) {
d_plot <- df_2022_manual_subcomponent %>%
dplyr::select(Rank, all_of(area_component_lists[[nm]]))
print(
corplot(
d_plot,
title = paste0(nm, ": Rank + Components/Subcomponents"),
hc_order = FALSE
)
)
}