# Get merged_hhs dataset

# Step 1: Define Helper Functions

## Function to download and read Excel files
download_and_read_excel <- function(url) {
  temp_file <- tempfile(fileext = ".xlsx")
  GET(url, write_disk(temp_file, overwrite = TRUE))
  data <- read_excel(temp_file)
  unlink(temp_file)
  return(data)
}

## Function to process datasets using metadata
process_dataset <- function(dataset, metadata) {
  column_map <- metadata %>%
    tidyr::separate_rows(`Original Column Name`, sep = "; ") %>%
    select(`Original Column Name`, `Standardized Column Name`) %>%
    distinct() %>%
    group_by(`Original Column Name`) %>%
    slice(1) %>%
    ungroup() %>%
    filter(`Original Column Name` %in% names(dataset))
  
  new_colnames <- names(dataset) %>%
    purrr::map_chr(~ {
      if (.x %in% column_map$`Original Column Name`) {
        column_map$`Standardized Column Name`[column_map$`Original Column Name` == .x]
      } else {
        NA_character_
      }
    })
  
  names(dataset) <- new_colnames
  dataset <- dataset %>%
    select(any_of(metadata$`Standardized Column Name`))
  return(dataset)
}

## Function to remove note columns (starting with "g99")
remove_g99_columns <- function(dataset) {
  dataset %>% select(-starts_with("g99"))
}

## Function to standardize column types across datasets
standardize_column_types <- function(datasets) {
  all_columns <- datasets %>%
    purrr::map(names) %>%
    purrr::reduce(union)
  
  datasets <- datasets %>%
    purrr::map(~ {
      dataset <- .x
      missing_columns <- setdiff(all_columns, names(dataset))
      dataset[missing_columns] <- NA
      dataset <- dataset %>%
        mutate(across(everything(), as.character))
      return(dataset)
    })
  return(datasets)
}

# Step 2: Define URLs for Data Sources
urls <- list(
  fastfield = "https://query.data.world/s/w67hchmwgk4xyshbmb4xkxitsamtag?dws=00000",
  kobo_1 = "https://query.data.world/s/2ltshiqf5ablwa6pw4uijvzgm6j37j?dws=00000",
  kobo_2 = "https://query.data.world/s/hczi25yxfpweveumkatcfobp4qbik5?dws=00000",
  kobo_3 = "https://query.data.world/s/hgss74tml5z4f3gdi4h2ztkw4wfcx2?dws=00000",
  kobo_4 = "https://query.data.world/s/v7amp6zyj7rxyoflfstnwpuvgse3mr?dws=00000",
  kobo_5 = "https://query.data.world/s/wejaidwccqs2wqtg7n5ksnxmfirwhn?dws=00000"
)

metadata_url <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vQc0btEwxARDVBC0Ny6ZCRlzoIjQ7txvbFoU-xlQOI97CP2tGNr4hPOskVGhr74dhCYQkBmxSm9zFkn/pub?output=csv"

# Step 3: Download Data
datasets <- list(
  fastfield = read.csv(urls$fastfield, header = TRUE, stringsAsFactors = FALSE, check.names = FALSE),
  kobo_1 = download_and_read_excel(urls$kobo_1),
  kobo_2 = download_and_read_excel(urls$kobo_2),
  kobo_3 = download_and_read_excel(urls$kobo_3),
  kobo_4 = read_excel(here("data", "kobo", "hhs_kobo_mod_4.xlsx")),
  kobo_5 = download_and_read_excel(urls$kobo_5)
)

metadata <- read_csv(metadata_url)

# Step 4: Process and Clean Datasets
processed_datasets <- purrr::map(datasets, process_dataset, metadata = metadata)
cleaned_datasets <- purrr::map(processed_datasets, remove_g99_columns)
standardized_datasets <- standardize_column_types(cleaned_datasets)

# Step 5: Merge All Datasets
merged_hhs <- purrr::reduce(standardized_datasets, full_join, by = NULL)

# Clear all objects except merged_hhs
rm(list = setdiff(ls(), "merged_hhs"))

# The merged_hhs dataset is now ready for use.



# Update the 'year' column using 'g0_year' and 'g0_submission_time'
merged_hhs <- merged_hhs %>%
  mutate(
    year = ifelse(is.na(g0_year), 
                  year(ymd_hms(g0_submission_time)), 
                  g0_year)
  )

# Filter and Recode data

# # Empowerment - Individual Behavior
    # g12_agreement_individual_behavior
# # Social Equity
    # g8_fishery_benefit_equal
# # Political Trust - Local
    # g8_trust_local_decision
# # Social Cohesion
    # g12_agreement_help_neighbors
# # Social Trust
    # g8_trust_community
# # Collective Efficacy
    # g8_my_community_ability
# # Empowerment - Participation
    # g12_agreement_community_participation



# Filter for relevant columns
merged_hhs_fil <- merged_hhs %>% 
  select(year, g1_country, 
         g12_agreement_individual_behavior, 
         g8_fishery_benefit_equal, 
         g8_trust_local_decision, 
         g12_agreement_help_neighbors, 
         g8_trust_community,
         g8_my_community_ability,
         g12_agreement_community_participation)

# Remove NAs
merged_hhs_fil <- na.omit(merged_hhs_fil)

# Empowerment - Individual Behavior
# unique(merged_hhs_fil$g12_agreement_individual_behavior)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g12_agreement_individual_behavior = case_when(
    g12_agreement_individual_behavior %in% c("Strongly disagree", "strongly_disagree") ~ 1,
    g12_agreement_individual_behavior %in% c("Disagree", "disagree") ~ 2,
    g12_agreement_individual_behavior %in% c("Neither agree nor disagree", "neither") ~ 3,
    g12_agreement_individual_behavior %in% c("Agree", "agree") ~ 4,
    g12_agreement_individual_behavior %in% c("Strongly agree", "strongly_agree") ~ 5,
    TRUE ~ NA_real_  # Assign NA to all other values
  )) %>%
  filter(!is.na(g12_agreement_individual_behavior))  # Remove rows with NA in this column


# Social Equity
# unique(merged_hhs_fil$g8_fishery_benefit_equal)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g8_fishery_benefit_equal = case_when(
    g8_fishery_benefit_equal %in% c("Yes", "yes") ~ 4,
    g8_fishery_benefit_equal %in% c("No", "no") ~ 2,
    TRUE ~ NA_real_  # Assign NA to all other values
  )) %>%
  filter(!is.na(g8_fishery_benefit_equal))  # Remove rows with NA in this column


# Political Trust - Local
# unique(merged_hhs_fil$g8_trust_local_decision)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g8_trust_local_decision = case_when(
    g8_trust_local_decision %in% c("Strongly disagree", "strongly_disagree") ~ 1,
    g8_trust_local_decision %in% c("Disagree", "disagree") ~ 2,
    g8_trust_local_decision %in% c("Neither", "neither") ~ 3,
    g8_trust_local_decision %in% c("Agree", "agree") ~ 4,
    g8_trust_local_decision %in% c("Strongly agree", "strongly_agree") ~ 5,
    TRUE ~ NA_real_  # Assign NA to all other values
  )) %>%
  filter(!is.na(g8_trust_local_decision))  # Remove rows with NA in this column

# Social Cohesion
# unique(merged_hhs_fil$g12_agreement_help_neighbors)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g12_agreement_help_neighbors = case_when(
    g12_agreement_help_neighbors %in% c("Strongly disagree", "strongly_disagree") ~ 1,
    g12_agreement_help_neighbors %in% c("Disagree", "disagree") ~ 2,
    g12_agreement_help_neighbors %in% c("Neither agree nor disagree", "neither") ~ 3,
    g12_agreement_help_neighbors %in% c("Agree", "agree") ~ 4,
    g12_agreement_help_neighbors %in% c("Strongly agree", "strongly_agree") ~ 5,
    TRUE ~ NA_real_  # Assign NA to all other values
  )) %>%
  filter(!is.na(g12_agreement_help_neighbors))  # Remove rows with NA in this column

# Social Trust
# unique(merged_hhs_fil$g8_trust_community)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g8_trust_community = case_when(
    g8_trust_community %in% c("Strongly disagree", "strongly_disagree") ~ 1,
    g8_trust_community %in% c("Disagree", "disagree") ~ 2,
    g8_trust_community %in% c("Neither", "neither") ~ 3,
    g8_trust_community %in% c("Agree", "agree") ~ 4,
    g8_trust_community %in% c("Strongly agree", "strongly_agree") ~ 5,
    TRUE ~ NA_real_  # Assign NA to all other values
  )) %>%
  filter(!is.na(g8_trust_community))  # Remove rows with NA in this column

# Collective Efficacy
# unique(merged_hhs_fil$g8_my_community_ability)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g8_my_community_ability = case_when(
    g8_my_community_ability %in% c("Strongly disagree", "strongly_disagree") ~ 1,
    g8_my_community_ability %in% c("Disagree", "disagree") ~ 2,
    g8_my_community_ability %in% c("Neither", "neither") ~ 3,
    g8_my_community_ability %in% c("Agree", "agree") ~ 4,
    g8_my_community_ability %in% c("Strongly agree", "strongly_agree") ~ 5,
    TRUE ~ NA_real_  # Assign NA to "Not Answered" and "No dependance"
  )) %>%
  filter(!is.na(g8_my_community_ability))  # Remove rows with NA in this column

# Empowerment - Participation
# unique(merged_hhs_fil$g12_agreement_community_participation)
# Recode the column and remove unwanted values
merged_hhs_fil <- merged_hhs_fil %>%
  mutate(g12_agreement_community_participation = case_when(
    g12_agreement_community_participation %in% c("Strongly disagree", "strongly_disagree") ~ 1,
    g12_agreement_community_participation %in% c("Disagree", "disagree") ~ 2,
    g12_agreement_community_participation %in% c("Neither agree nor disagree", "neither") ~ 3,
    g12_agreement_community_participation %in% c("Agree", "agree") ~ 4,
    g12_agreement_community_participation %in% c("Strongly agree", "strongly_agree") ~ 5,
    TRUE ~ NA_real_  # Assign NA to "Not Answered", "na", and "idk"
  )) %>%
  filter(!is.na(g12_agreement_community_participation))  # Remove rows with NA in this column

Introduction

This analysis explores the relationships among the components that make up the Capacity for Collective Action Impact Factor to assess potential redundancies and opportunities for simplification. To achieve this, we conducted a Spearman’s Correlation Analysis and a Principal Component Analysis (PCA) to determine the extent to which different components are related.

Based on these analyses, we tested two reduced models:

A 3-Metric Model to retain only the most essential and non-redundant components.
A 4-Metric Model to maintain balance between efficiency and conceptual completeness.

The results allow us to refine the Capacity for Collective Action indicator, ensuring it remains analytically sound while being simplified for more practical use.

Components of the Capacity for Collective Action Indicator

Below is a brief description of each component included in the original 7-metric model, along with the corresponding Household Survey (HHS) question used for measurement.

# Create a dataframe with correct column names
components_table <- data.frame(
  Component = c(
    "Empowerment - Individual Behavior",
    "Empowerment - Participation",
    "Social Cohesion",
    "Social Trust",
    "Political Trust - Local",
    "Collective Efficacy",
    "Social Equity"
  ),
  Definition = c(
    "The extent to which individuals believe they can make a difference in fisheries sustainability through their own actions.",
    "The degree to which individuals believe that local community participation in management will help to maintain or improve fish catch.",
    "The level of connectedness and mutual help among community members.",
    "The extent to which individuals trust their fellow community members.",
    "The level of trust in local decision-makers to make decisions that benefit the community over their personal interests.",
    "The belief in the community’s ability to effectively manage and sustain fisheries.",
    "The perception of fair access to fishery benefits."
  ),
  HHS_Question = c(
    '"I believe my individual actions can help sustain the fishery."',
    '"I actively participate in my community\'s efforts to manage the fishery."',
    '"People in my community help each other when needed."',
    '"I trust most people in my community to act in the best interest of the fishery."',
    '"I trust local authorities to make good decisions for our fishery."',
    '"My community has the ability to manage our fishery successfully."',
    '"Do you think the benefits from the fishery are fairly distributed?"'
  )
)

# Use kable() and explicitly set column names to prevent automatic conversion
kable(components_table, format = "html", col.names = c("Component", "Definition", "HHS Question"),
      caption = "Table 1: Components of the Capacity for Collective Action Indicator") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>%
  column_spec(1, bold = TRUE, color = "black", background = "#f2f2f2") %>%
  column_spec(2, width = "40em") %>%
  column_spec(3, italic = TRUE, width = "30em") %>%
  row_spec(0, bold = TRUE, background = "#d3d3d3")

Table 1: Components of the Capacity for Collective Action Indicator
Component	Definition	HHS Question
Empowerment - Individual Behavior	The extent to which individuals believe they can make a difference in fisheries sustainability through their own actions.	“I believe my individual actions can help sustain the fishery.”
Empowerment - Participation	The degree to which individuals believe that local community participation in management will help to maintain or improve fish catch.	“I actively participate in my community’s efforts to manage the fishery.”
Social Cohesion	The level of connectedness and mutual help among community members.	“People in my community help each other when needed.”
Social Trust	The extent to which individuals trust their fellow community members.	“I trust most people in my community to act in the best interest of the fishery.”
Political Trust - Local	The level of trust in local decision-makers to make decisions that benefit the community over their personal interests.	“I trust local authorities to make good decisions for our fishery.”
Collective Efficacy	The belief in the community’s ability to effectively manage and sustain fisheries.	“My community has the ability to manage our fishery successfully.”
Social Equity	The perception of fair access to fishery benefits.	“Do you think the benefits from the fishery are fairly distributed?”

Spearman’s Correlation Analysis

To assess the strength and direction of relationships between the components, we used Spearman’s correlation, which is ideal for ordinal data. This analysis helped us identify:

Strong Correlations (Potential Redundancies): Components that measure similar concepts.
Moderate Correlations (Possible Conceptual Links): Related but distinct factors.
Weak or No Correlation (Distinct Metrics): Components that measure independent aspects of collective action.

# Rename columns to Wellbeing Impact names
data_subset <- merged_hhs_fil %>%
  select(
    "Empowerment - Individual Behavior" = g12_agreement_individual_behavior,
    "Social Equity" = g8_fishery_benefit_equal,
    "Political Trust - Local" = g8_trust_local_decision,
    "Social Cohesion" = g12_agreement_help_neighbors,
    "Social Trust" = g8_trust_community,
    "Collective Efficacy" = g8_my_community_ability,
    "Empowerment - Participation" = g12_agreement_community_participation
  )

# Compute Spearman's correlation
spearman_corr <- cor(data_subset, method = "spearman", use = "pairwise.complete.obs")

# Print correlation matrix with renamed variables
# print(spearman_corr)

# Visualize correlation matrix as a heatmap with rotated axis labels and proper names
ggcorrplot(spearman_corr, 
           method = "square", 
           type = "lower", 
           lab = TRUE, 
           lab_size = 4, 
           colors = c("red", "white", "blue"), 
           title = "Spearman's Correlation Heatmap",
           outline.color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))+
  labs(x = "",
       y = "")

Correlation Results

Strong Correlations (Potential Redundancies)
- Empowerment - Individual Behavior / Empowerment - Participation (0.52): Suggests that respondents who believe in individual behavior contributing to fisheries sustainability also strongly believe in community participation.
- Social Cohesion / Empowerment - Individual Behavior (0.52): Suggests that individuals who feel it is important to help neighbors also feel empowered in their individual fishing behavior.
Moderate Correlations (Possible Conceptual Links)
- Social Cohesion / Empowerment - Participation (0.46): Suggests that community participation is moderately associated with the belief in helping neighbors.
- Political Trust - Local / Social Trust (0.40): Suggests that individuals who trust their local decision-makers tend to also trust their community members.
Weak or No Correlation (Distinct Metrics)
- Social Equity / Other Metrics (all correlations < 0.12): Social Equity is largely independent from other Wellbeing Impact metrics.
- Collective Efficacy / Other Metrics (mostly < 0.26): Relatively weak correlations across the board suggest that community confidence in managing fisheries is conceptually distinct.

Principal Component Analysis

We applied PCA to determine whether the 7 original components could be reduced into a smaller set while preserving most of the information. PCA helped:

Identify which components contributed the most to explaining variance.
Detect overlapping dimensions, allowing us to remove redundancies.

# Select relevant columns for PCA
data_pca <- merged_hhs_fil %>%
  select(
    "Empowerment - Individual Behavior" = g12_agreement_individual_behavior,
    "Social Equity" = g8_fishery_benefit_equal,
    "Political Trust - Local" = g8_trust_local_decision,
    "Social Cohesion" = g12_agreement_help_neighbors,
    "Social Trust" = g8_trust_community,
    "Collective Efficacy" = g8_my_community_ability,
    "Empowerment - Participation" = g12_agreement_community_participation
  )

# Run PCA (center = TRUE and scale = TRUE standardizes the data)
pca_result <- prcomp(data_pca, center = TRUE, scale. = TRUE)

# Print PCA summary (explained variance)
summary(pca_result)

#> Importance of components:
#>                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
#> Standard deviation     1.5413 1.1213 0.9755 0.8793 0.78406 0.74072 0.69202
#> Proportion of Variance 0.3394 0.1796 0.1359 0.1105 0.08782 0.07838 0.06841
#> Cumulative Proportion  0.3394 0.5190 0.6549 0.7654 0.85321 0.93159 1.00000

# PCA Loadings (contributions of variables to each PC)
pca_loadings <- as.data.frame(pca_result$rotation)
print(pca_loadings)

#>                                          PC1        PC2         PC3
#> Empowerment - Individual Behavior -0.4815042  0.3078554 -0.05134179
#> Social Equity                     -0.1350021 -0.2984915 -0.93266322
#> Political Trust - Local           -0.3257888 -0.4978214  0.23549689
#> Social Cohesion                   -0.4145823  0.4508652 -0.06345440
#> Social Trust                      -0.4044004 -0.3177436  0.25057977
#> Collective Efficacy               -0.2870409 -0.4437365  0.06083188
#> Empowerment - Participation       -0.4753726  0.2591512 -0.03908131
#>                                            PC4          PC5         PC6
#> Empowerment - Individual Behavior -0.040519265 -0.206820068  0.02337655
#> Social Equity                      0.136237069  0.062651301  0.01780707
#> Political Trust - Local            0.418671511 -0.565984115  0.28202997
#> Social Cohesion                   -0.042636302  0.194979454  0.64742695
#> Social Trust                       0.300445919  0.740618575 -0.14976813
#> Collective Efficacy               -0.844027174 -0.008621593  0.05019255
#> Empowerment - Participation        0.006658948 -0.215302256 -0.68955355
#>                                            PC7
#> Empowerment - Individual Behavior  0.791062571
#> Social Equity                     -0.003710166
#> Political Trust - Local           -0.124144842
#> Social Cohesion                   -0.402267525
#> Social Trust                       0.107214620
#> Collective Efficacy               -0.045049916
#> Empowerment - Participation       -0.428311418

# Extract first two principal components for further analysis
pca_scores <- as.data.frame(pca_result$x)
head(pca_scores)

#>          PC1        PC2        PC3         PC4       PC5        PC6       PC7
#> 1 -0.1091051 -0.2910924 -0.8412106 -1.94416367  1.134458 -0.5369504 0.2858168
#> 2 -1.2721534  0.8573109  1.3462310 -0.36286237 -1.619550  1.4143880 0.3393410
#> 3 -0.5674806 -0.6255719 -0.3753567 -0.09174717  1.449847 -0.4892551 0.3431468
#> 4  1.2642934  2.0249602  0.9330083  0.94865851  1.032393 -0.7646452 0.4638153
#> 5  1.5995090  2.5371863  0.6906972  0.51787256  1.614754 -1.0548358 0.5915523
#> 6  2.7531771  0.3181578  0.6761048 -1.76681533 -1.347488 -2.2200884 1.2993363

# Scree plot: Explained variance of each principal component
fviz_eig(pca_result, addlabels = TRUE, ylim = c(0, 50))

PCA Results

PC1 explains 33.9% of the variance, while PC2 explains 17.9%, together covering 51.9% of total variance.
Adding PC3 (13.6%) and PC4 (11.0%), we reach 76.5% cumulative variance explained.
The scree plot shows a clear elbow at PC2 or PC3, indicating that two to three principal components capture most of the data structure; i.e., keeping two to three principal components would be a reasonable trade-off between simplification and variance retention.
Reducing from 7 Wellbeing Impacts to 3 or 4 key metrics may be feasible while maintaining interpretability.

# Visualize PCA loadings (biplot)
fviz_pca_var(pca_result, col.var = "cos2", gradient.cols = c("blue", "red"),
             repel = TRUE, title = "PCA Loadings (Biplot)")

Biplot

PC1 (Dominant component, 33.9%): Empowerment - Individual Behavior / Empowerment - Participation / Social Cohesion → These three variables cluster together, meaning they likely capture similar aspects of community engagement and cooperation.
PC2 (Second strongest, 17.9%): Political Trust - Local / Social Trust → This suggests that PC2 represents trust-related aspects distinct from empowerment and participation.
PC3 and PC4 contribute Collective Efficacy and Social Equity but with lower overall influence.

Reduced Models

How to Reduce from 7 to 3-4 Wellbeing Impacts for the Capacity for Collective Action Impact Factor?

From the Correlation matrix:

Empowerment - Individual Behavior / Empowerment - Participation (0.52 correlation) → Measures similar aspects of personal vs. collective action.
Political Trust - Local / Social Trust (0.40 correlation) → Both relate to trust but at different levels (institutions vs. community).

From PCA loadings:

PC1 (34% variance) → Empowerment & Social Cohesion
PC2 (18% variance) → Political & Social Trust
PC3 (14% variance) → Captures Collective Efficacy
PC4 (11% variance) → Captures Social Equity

To ensure diversity and avoid redundancy, the optimal selection would be:

Option 1: 3-Metric Selection

Empowerment - Participation (represents Empowerment & Social Cohesion)
Political Trust - Local (represents Political & Social Trust)
Collective Efficacy

Option 2: 4-Metric Selection

Empowerment - Participation (represents Empowerment & Social Cohesion)
Political Trust - Local (represents Political & Social Trust)
Collective Efficacy
Social Equity

# Full 7-metric model
full_model <- merged_hhs_fil %>%
  select(
    "Empowerment - Individual Behavior" = g12_agreement_individual_behavior,
    "Social Equity" = g8_fishery_benefit_equal,
    "Political Trust - Local" = g8_trust_local_decision,
    "Social Cohesion" = g12_agreement_help_neighbors,
    "Social Trust" = g8_trust_community,
    "Collective Efficacy" = g8_my_community_ability,
    "Empowerment - Participation" = g12_agreement_community_participation
  )

# Reduced 3-metric model (simplest version)
reduced_3_model <- merged_hhs_fil %>%
  select(
    "Empowerment - Participation" = g12_agreement_community_participation,
    "Political Trust - Local" = g8_trust_local_decision,
    "Collective Efficacy" = g8_my_community_ability
  )

# Reduced 4-metric model (balanced version)
reduced_4_model <- merged_hhs_fil %>%
  select(
    "Empowerment - Participation" = g12_agreement_community_participation,
    "Political Trust - Local" = g8_trust_local_decision,
    "Collective Efficacy" = g8_my_community_ability,
    "Social Equity" = g8_fishery_benefit_equal
  )


# ---- Compare Correlations ----
# Full Model Correlation Plot
# ggpairs(full_model) + ggtitle("Correlation Matrix - Full Model")

# Reduced 3 Model Correlation Plot
ggpairs(reduced_3_model) + ggtitle("Correlation Matrix - 3-Metric Model")

# Reduced 4 Model Correlation Plot
ggpairs(reduced_4_model) + ggtitle("Correlation Matrix - 4-Metric Model")

# ---- Print PCA summaries ----
# print("PCA Summary - Full Model:")
# print(summary(pca_full))
# 
# print("PCA Summary - Reduced 3 Model:")
# print(summary(pca_reduced_3))
# 
# print("PCA Summary - Reduced 4 Model:")
# print(summary(pca_reduced_4))

# ---- Print PCA Loadings (Variable Contributions) ----
# print("PCA Loadings - Full Model:")
# print(pca_full$rotation)
# 
# print("PCA Loadings - Reduced 3 Model:")
# print(pca_reduced_3$rotation)
# 
# print("PCA Loadings - Reduced 4 Model:")
# print(pca_reduced_4$rotation)

Correlation Results

3-Metric Model: Correlations between Empowerment - Participation, Political Trust - Local, and Collective Efficacy remain relatively low (~0.19-0.25), showing that each metric contributes unique information.
4-Metric Model: The addition of Social Equity does not significantly increase correlation with other variables. This suggests that including Social Equity adds a distinct fairness dimension without redundancy.

Key Takeaway: The selected reduced models avoid redundancy, ensuring that each metric contributes unique information. Including Social Equity in the 4-metric model may be a good choice because it remains largely independent.

# ---- Run PCA on all models ----
pca_full <- prcomp(full_model, center = TRUE, scale. = TRUE)
pca_reduced_3 <- prcomp(reduced_3_model, center = TRUE, scale. = TRUE)
pca_reduced_4 <- prcomp(reduced_4_model, center = TRUE, scale. = TRUE)

# ---- Compare Explained Variance ----
# fviz_eig(pca_full, addlabels = TRUE, ylim = c(0, 50)) + ggtitle("PCA: Full Model (7 Metrics)")
fviz_eig(pca_reduced_3, addlabels = TRUE, ylim = c(0, 50)) + ggtitle("PCA: Reduced Model (3 Metrics)")

fviz_eig(pca_reduced_4, addlabels = TRUE, ylim = c(0, 50)) + ggtitle("PCA: Reduced Model (4 Metrics)")

PCA Results

3-Metric Model: PC1 = 47.0%, PC2 = 28.2%, PC3 = 24.8% . The first two PCs capture 75.2% of the variance, which is comparable to the full model despite fewer metrics. This model is highly efficient, retaining key information while simplifying the calculation.
4-Metric Model: PC1 = 36.8%, PC2 = 23.5%, PC3 = 21.1%, PC4 = 18.6%. The first three PCs capture 81.4% of the variance, slightly improving over the 3-metric model. This model balances simplicity and completeness, making it a strong candidate.

Key Takeaway: The reduced models still capture most of the variance. Keeping 3-4 metrics is justified, given that the first few principal components still capture the majority of variance.

Final Recommendation

3-Metric Model: Most efficient (captures 75% of variance), reduces complexity, but lacks social equity dimension.
4-Metric Model: Balances efficiency with fairness (81% variance explained), but is slightly more complex.

The 4-metric model is the best choice, as it maintains high variance capture while ensuring social equity is included.

# #Calculate and Standardize "Capacity for Collective Action" Index
# 
# # Select the 4 metrics for the index
# data_index <- merged_hhs_fil %>%
#   select(
#     "Empowerment - Participation" = g12_agreement_community_participation,
#     "Political Trust - Local" = g8_trust_local_decision,
#     "Collective Efficacy" = g8_my_community_ability,
#     "Social Equity" = g8_fishery_benefit_equal
#   )
# 
# # Standardize the variables (Min-Max Scaling: 0 to 1)
# data_index_scaled <- as.data.frame(lapply(data_index, rescale, to = c(0,1)))
# 
# # Compute the Capacity for Collective Action Index as the mean of the 4 standardized variables
# merged_hhs_fil$Collective_Action_Index <- rowMeans(data_index_scaled, na.rm = TRUE)
# 
# # Print summary of the index
# summary(merged_hhs_fil$Collective_Action_Index)
# 
# # Histogram to visualize the distribution of the index
# ggplot(merged_hhs_fil, aes(x = Collective_Action_Index)) +
#   geom_histogram(binwidth = 0.1, fill = "blue", alpha = 0.7, color = "black") +
#   labs(title = "Distribution of Capacity for Collective Action Index", x = "Index Value", y = "Frequency") +
#   theme_minimal()

Exploring the Capacity for Collective Action Impact Factor: Correlation and PCA Analysis

Mariano Viz

2025-02-18