Introduction

This analysis examines the evolution of review and revise provisions in EU legislation from 1993 to 2023, focusing on both articles and rules across different types of rules and domains.

#load required libraries 
library(ggplot2)
library(dplyr)
library(tidyr)  
library(patchwork)
library(stringr)
library(scales)

Data Preparation

Analyze in General

analyze_total_provisions <- function(data) {
  # Calculate totals across all years
  total_stats <- data %>%
    summarise(
      total_articles = sum(total_articles),
      articles_with_revise = sum(articles_with_revise),
      total_rules = sum(total_rules),
      rules_with_revise = sum(rules_with_revise)
    )
  
  # Print results
  cat("\nTOTAL STATISTICS ACROSS ALL YEARS:\n")
  cat("----------------------------------------\n")
  cat("ARTICLES:\n")
  cat("Total number of articles:", total_stats$total_articles, "\n")
  cat("Articles with review/revise provisions:", total_stats$articles_with_revise, "\n")
  cat("Percentage of articles with provisions:", 
      round(total_stats$articles_with_revise / total_stats$total_articles * 100, 2), "%\n")
  
  cat("\nRULES:\n")
  cat("Total number of rules:", total_stats$total_rules, "\n")
  cat("Rules with review/revise provisions:", total_stats$rules_with_revise, "\n")
  cat("Percentage of rules with provisions:", 
      round(total_stats$rules_with_revise / total_stats$total_rules * 100, 2), "%\n")
  
  return(total_stats)
}

total_stats <- analyze_total_provisions(df)
## 
## TOTAL STATISTICS ACROSS ALL YEARS:
## ----------------------------------------
## ARTICLES:
## Total number of articles: 330459 
## Articles with review/revise provisions: 1271 
## Percentage of articles with provisions: 0.38 %
## 
## RULES:
## Total number of rules: 84776 
## Rules with review/revise provisions: 1097 
## Percentage of rules with provisions: 1.29 %

Visualization

# Base Theme
create_base_theme <- function() {
  theme_minimal(base_size = 12) +
    theme(
      text = element_text(family = "Times New Roman"),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_line(color = "gray90"),
      plot.title = element_text(size = 11, face = "bold"),
      strip.text = element_text(face = "bold"),
      strip.background = element_blank(),
      panel.spacing = unit(2, "lines"),
      plot.margin = margin(1, 1, 1, 1, "cm"),
      axis.text.x = element_text(angle = 45, hjust = 1)
    )
}

Plot 1: Annual Count of Articles Containing Review and Revise Provisions (1993-2023)

# Calculate ratios
df <- df %>%
  mutate(
    articles_revise_ratio = articles_with_revise/total_articles,
    rules_revise_ratio = rules_with_revise/total_rules
  )

# Articles with revise clauses plot
ggplot(df, aes(x = year, y = articles_with_revise)) +
  geom_area(alpha = 0.1, fill = "#2c3e50") +
  geom_line(linewidth = 0.5, color = "#2c3e50", alpha = 0.8) +
  geom_point(size = 1.5, color = "#2c3e50", alpha = 0.8) +
  geom_smooth(method = "lm", color = "#e74c3c", 
              linewidth = 0.5, linetype = "dashed", 
              alpha = 0.7, se = FALSE) +
  scale_x_continuous(
    breaks = seq(1995, 2020, by = 5),
    limits = c(1993, 2023),
    expand = c(0.02, 0)
  ) +
  scale_y_continuous(
    breaks = seq(0, 100, by = 20),
    expand = c(0, 0)
  ) +
  ylim(0, 80) +
  theme_minimal(base_size = 14) +
  labs(
    title = "Review and Revise Provisions in EU Articles",
    subtitle = "Annual Count of Articles Containing Review and Revise Provisions (1993-2023)",
    x = "Year",
    y = "Number of Articles"
  )

Calculate summary statistics

# Calculate overall summary statistics for articles with review provisions
overall_stats <- df %>%
  summarise(
    Total = sum(articles_with_revise, na.rm = TRUE),
    Mean = mean(articles_with_revise, na.rm = TRUE),
    Median = median(articles_with_revise, na.rm = TRUE),
    Max = max(articles_with_revise, na.rm = TRUE),
    Min = min(articles_with_revise, na.rm = TRUE)
  ) %>%
  # Round all numeric values to 2 decimal places
  mutate(across(everything(), ~round(., 2)))

# Print the overall statistics
print("Overall Statistics of Articles with Review Provisions (1993-2023):")
## [1] "Overall Statistics of Articles with Review Provisions (1993-2023):"
print(overall_stats)
##   Total Mean Median Max Min
## 1  1271   41     38  79  20

Plot 2: Percentgage of Articles Containing Review and Revise Provisions (1993-2023)

ggplot(df, aes(x = year, y = articles_with_revise/total_articles)) +
  geom_point(size = 2, color = "#2c3e50") +
  geom_line(color = "#2c3e50", alpha = 0.7) +
  geom_smooth(method = "lm", color = "#c0392b", 
              linetype = "dashed", size = 0.8, se = FALSE) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    plot.title = element_text(size = 11, face = "bold")
  ) +
  labs(
    title = "Percentage of Articles with Review Provisions (1993-2023)",
    x = "Year",
    y = "Percentage of Articles with Review Provisions"
  )

Calculate summary statistics for yearly average

# Calculate statistics for the ratio of articles with review provisions
articles_ratio_stats <- df %>%
  mutate(ratio = articles_with_revise/total_articles * 100) %>%  # Convert to percentage
  summarise(
    Mean = mean(ratio, na.rm = TRUE),
    Median = median(ratio, na.rm = TRUE),
    Max = max(ratio, na.rm = TRUE),
    Min = min(ratio, na.rm = TRUE)
  ) %>%
  # Round all numeric values to 2 decimal places
  mutate(across(everything(), ~round(., 2)))

# Print the statistics
print("Statistics of Percentage of Articles with Review Provisions (1993-2023):")
## [1] "Statistics of Percentage of Articles with Review Provisions (1993-2023):"
print(articles_ratio_stats)
##   Mean Median  Max Min
## 1 0.39   0.35 0.76 0.2

Plot 3: Number of Rules with Review and Revise Provisions (1993-2023)

ggplot(df, aes(x = year, y = rules_with_revise)) +
  # Add subtle area fill for visual weight
  geom_area(alpha = 0.1, fill = "#2c3e50") +
  
  # Main line with thinner width
  geom_line(linewidth = 0.5, color = "#2c3e50", alpha = 0.8) +
  
  # Smaller points
  geom_point(size = 1.5, color = "#2c3e50", alpha = 0.8) +
  
  # Subtle trend line
  geom_smooth(method = "lm", color = "#e74c3c", 
              linewidth = 0.5, linetype = "dashed", 
              alpha = 0.7, se = FALSE) +
  
  # Adjusted scales
  scale_x_continuous(
    breaks = seq(1995, 2020, by = 5),
    limits = c(1993, 2023),
    expand = c(0.02, 0)
  ) +
  
  scale_y_continuous(
    expand = c(0, 0)
  ) +
  
  ylim (0,70) +
  
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray95"),
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(t = 20, r = 20, b = 20, l = 20)
  ) +
  labs(
    title = "Review and Revise Provisions in EU Rules",
    subtitle = "Number of Rules with Review and Revise Provisions (1993-2023)",
    x = "Year",
    y = "Number of Rules"
  )

Calculate summary statistics for yearly average

# Calculate overall summary statistics for rules with review provisions
rules_stats <- df %>%
  summarise(
    Total = sum(rules_with_revise, na.rm = TRUE),
    Mean = mean(rules_with_revise, na.rm = TRUE),
    Median = median(rules_with_revise, na.rm = TRUE),
    Max = max(rules_with_revise, na.rm = TRUE),
    Min = min(rules_with_revise, na.rm = TRUE)
  ) %>%
  # Round all numeric values to 2 decimal places
  mutate(across(everything(), ~round(., 2)))

# Print the overall statistics
print("Overall Statistics of Rules with Review Provisions (1993-2023):")
## [1] "Overall Statistics of Rules with Review Provisions (1993-2023):"
print(rules_stats)
##   Total  Mean Median Max Min
## 1  1097 35.39     33  67  17

Plot 4: Number of Rules with Review and Revise Provisions (1993-2023)

overall_rules_ratio <- df_clean %>%
  select(year, rules_with_revise, total_rules) %>%
  mutate(ratio = (rules_with_revise / total_rules) * 100)

# Create plot
ggplot(overall_rules_ratio, aes(x = year, y = ratio)) +
  # Area and line layers
  geom_area(alpha = 0.1, fill = "#2c3e50") +
  geom_line(linewidth = 0.5, color = "#2c3e50", alpha = 0.8) +
  geom_point(size = 1.5, color = "#2c3e50", alpha = 0.8) +
  
  # Add trend line
  geom_smooth(method = "lm", color = "#e74c3c", 
              linewidth = 0.5, linetype = "dashed", 
              alpha = 0.7, se = FALSE) +
  
  # Scales
  scale_x_continuous(
    breaks = seq(1995, 2020, by = 5),
    limits = c(1993, 2023),
    expand = c(0.02, 0)
  ) +
  scale_y_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, NA),
    breaks = seq(0, max(overall_rules_ratio$ratio, na.rm = TRUE) + 1, by = 0.5)
  ) +
  
  # Theme
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray95"),
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(t = 20, r = 20, b = 20, l = 20)
  ) +
  labs(
    title = "Review and Revise Provisions in EU Rules",
    subtitle = "Percentage of Rules Containing Review and Revise Provisions (1993-2023)",
    x = "Year",
    y = "Percentage of Rules"
  )

Calculate summary statistics

# Calculate statistics for the ratio of rules with review provisions
rules_ratio_stats <- df_clean %>%
  mutate(ratio = (rules_with_revise / total_rules) * 100) %>%  # Calculate ratio as percentage
  summarise(
    Mean = mean(ratio, na.rm = TRUE),
    Median = median(ratio, na.rm = TRUE),
    Max = max(ratio, na.rm = TRUE),
    Min = min(ratio, na.rm = TRUE)
  ) %>%
  # Round all numeric values to 2 decimal places
  mutate(across(everything(), ~round(., 2)))

# Print the statistics
print("Statistics of Percentage of Rules with Review Provisions (1993-2023):")
## [1] "Statistics of Percentage of Rules with Review Provisions (1993-2023):"
print(rules_ratio_stats)
##   Mean Median  Max  Min
## 1  1.4    1.2 3.65 0.55

Plot 5: Calcualte count of articles with revise and review provisions by legislative and non-legislative acts

# Create data for absolute numbers of articles
articles_count_data <- df_clean %>%
  select(
    year,
    matches("_articles_with_revise_count$")
  ) %>%
  # Pivot to get counts
  pivot_longer(
    cols = matches("_articles_with_revise_count$"),
    names_to = "form",
    values_to = "count",
    names_pattern = "(.+)_articles_with_revise_count"
  ) %>%
  # Set up the same three-column layout
  mutate(
    form = factor(form, levels = c(
      # First column - Legislative acts
      "Directive", 
      "Regulation", 
      "Decision",
      # Second column - Implementing acts
      "Implementing directive",
      "Implementing regulation",
      "Implementing decision",
      # Third column - Delegated acts
      "Delegated directive",
      "Delegated regulation",
      "Delegated decision"
    ))
  ) %>%
  filter(!is.na(form))

# Create the visualization
ggplot(articles_count_data, aes(x = year, y = count)) +
  geom_line(color = "#2c3e50", alpha = 0.7) +
  geom_point(size = 2, color = "#2c3e50") +
  facet_wrap(~ form, ncol = 3) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(
    breaks = seq(0, max(articles_count_data$count, na.rm = TRUE), by = 10)
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    strip.text = element_text(face = "bold"),
    strip.background = element_blank(),
    panel.spacing = unit(2, "lines"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  ) +
  labs(
    title = "Number of Articles with Review and Revise Provisions by Legislative and Non-Legislative Acts",
    subtitle = "Absolute count of articles with review provisions (1993-2023)",
    x = "Year",
    y = "Number of Articles"
  )

Calculate summary statistics

# Calculate summary statistics for counts
articles_count_summary <- articles_count_data %>%
  group_by(form) %>%
  summarise(
    Total = sum(count, na.rm = TRUE),
    Mean = mean(count, na.rm = TRUE),
    Max = max(count, na.rm = TRUE),
    Min = min(count, na.rm = TRUE)
  ) %>%
  arrange(desc(Total))

# Print summary statistics
print("Summary Statistics of Article Counts by Form:")
## [1] "Summary Statistics of Article Counts by Form:"
print(articles_count_summary, n = Inf)
## # A tibble: 9 × 5
##   form                    Total    Mean   Max   Min
##   <fct>                   <dbl>   <dbl> <dbl> <dbl>
## 1 Regulation                524 16.9       37     8
## 2 Directive                 421 13.6       26     4
## 3 Decision                  129  4.16      17     0
## 4 Delegated regulation       12  0.387      8     0
## 5 Implementing regulation    10  0.323      2     0
## 6 Implementing decision       1  0.0323     1     0
## 7 Implementing directive      0  0          0     0
## 8 Delegated directive         0  0          0     0
## 9 Delegated decision          0  0          0     0

Plot 6: Percentage of Articles with Review and Revise Provisions by Legislative and Non-Legislative Acts

# Create articles ratio data with similar layout to rules
articles_ratio_data <- df_clean %>%
  select(
    year,
    matches("_articles_with_revise_count$"),
    matches("_total_articles$")
  ) %>%
  # First pivot the revise counts
  pivot_longer(
    cols = matches("_articles_with_revise_count$"),
    names_to = "form",
    values_to = "revise_count",
    names_pattern = "(.+)_articles_with_revise_count"
  ) %>%
  # Join with total articles
  left_join(
    df_clean %>%
      select(year, matches("_total_articles$")) %>%
      pivot_longer(
        cols = -year,
        names_to = "form",
        values_to = "total_count",
        names_pattern = "(.+)_total_articles"
      ),
    by = c("form", "year")
  ) %>%
  # Calculate ratio and handle NAs
  mutate(
    revise_count = replace_na(revise_count, 0),
    total_count = replace_na(total_count, 0),
    ratio = case_when(
      total_count == 0 ~ 0,
      TRUE ~ (revise_count / total_count) * 100
    ),
    # Set up the same three-column layout as rules
    form = factor(form, levels = c(
      # First column - Legislative acts
      "Directive", 
      "Regulation", 
      "Decision",
      # Second column - Implementing acts
      "Implementing directive",
      "Implementing regulation",
      "Implementing decision",
      # Third column - Delegated acts
      "Delegated directive",
      "Delegated regulation",
      "Delegated decision"
    ))
  ) %>%
  # Remove any rows where form is NA
  filter(!is.na(form))

# Create the visualization
ggplot(articles_ratio_data, aes(x = year, y = ratio)) +
  geom_line(color = "#2c3e50", alpha = 0.7) +
  geom_point(size = 2, color = "#2c3e50") +
  facet_wrap(~ form, ncol = 3) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, 5),  # Adjusted for articles which might have higher percentages
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    strip.text = element_text(face = "bold"),
    strip.background = element_blank(),
    panel.spacing = unit(2, "lines"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  ) +
  labs(
    title = "Percentage of Articles with Review and Revise Provisions by Legislative and Non-Legislative Acts",
    subtitle = "Ratio calculated against the number of articles of each type of rules (1993-2023)",
    x = "Year",
    y = "Percentage"
  )

Calculate summary statistics

# Calculate summary statistics for articles
articles_summary <- articles_ratio_data %>%
  group_by(form) %>%
  summarise(
    Mean = mean(ratio, na.rm = TRUE),
    Median = median(ratio, na.rm = TRUE),
    SD = sd(ratio, na.rm = TRUE),
    Min = min(ratio, na.rm = TRUE),
    Max = max(ratio, na.rm = TRUE)
  ) %>%
  # Round all numeric columns to 2 decimal places
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  # Sort by mean in descending order
  arrange(desc(Mean))

# Print the summary statistics
print("Summary Statistics of Percentage of Articles with Review Provisions by Form:")
## [1] "Summary Statistics of Percentage of Articles with Review Provisions by Form:"
print(articles_summary, n = Inf)
## # A tibble: 9 × 6
##   form                     Mean Median    SD   Min   Max
##   <fct>                   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Directive                2      1.71  0.95  0.73  4.69
## 2 Regulation               0.47   0.31  0.39  0.11  1.48
## 3 Decision                 0.14   0.12  0.14  0     0.59
## 4 Delegated regulation     0.04   0     0.12  0     0.57
## 5 Implementing regulation  0.01   0     0.03  0     0.11
## 6 Implementing directive   0      0     0     0     0   
## 7 Implementing decision    0      0     0.02  0     0.12
## 8 Delegated directive      0      0     0     0     0   
## 9 Delegated decision       0      0     0     0     0

Plot 7: Number of Rules with Review Provisions by Legislative and Non-Legislative Acts (1993-2023)

# Create data for absolute numbers
rules_count_data <- df_clean %>%
  select(
    year,
    matches("_rules_with_revise_count$")
  ) %>%
  # Pivot to get counts
  pivot_longer(
    cols = matches("_rules_with_revise_count$"),
    names_to = "form",
    values_to = "count",
    names_pattern = "(.+)_rules_with_revise_count"
  ) %>%
  # Set up the same three-column layout
  mutate(
    form = factor(form, levels = c(
      # First column - Legislative acts
      "Directive", 
      "Regulation", 
      "Decision",
      # Second column - Implementing acts
      "Implementing directive",
      "Implementing regulation",
      "Implementing decision",
      # Third column - Delegated acts
      "Delegated directive",
      "Delegated regulation",
      "Delegated decision"
    ))
  ) %>%
  filter(!is.na(form))

# Create the visualization
ggplot(rules_count_data, aes(x = year, y = count)) +
  geom_line(color = "#2c3e50", alpha = 0.7) +
  geom_point(size = 2, color = "#2c3e50") +
  facet_wrap(~ form, ncol = 3) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(
    breaks = seq(0, max(rules_count_data$count, na.rm = TRUE), by = 5)
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    strip.text = element_text(face = "bold"),
    strip.background = element_blank(),
    panel.spacing = unit(2, "lines"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  ) +
  labs(
    title = "Number of Rules with Review and Revise Provisions by Legislative and Non-Legislative Acts",
    subtitle = "Absolute count of rules with review provisions (1993-2023)",
    x = "Year",
    y = "Number of Rules"
  )

Calculate summary statistics for counts

count_summary <- rules_count_data %>%
  group_by(form) %>%
  summarise(
    Total = sum(count, na.rm = TRUE),
    Mean = mean(count, na.rm = TRUE),
    Max = max(count, na.rm = TRUE),
    Min = min(count, na.rm = TRUE)
  ) %>%
  arrange(desc(Total))

# Print summary statistics
print("Summary Statistics of Rule Counts by Form:")
## [1] "Summary Statistics of Rule Counts by Form:"
print(count_summary, n = Inf)
## # A tibble: 9 × 5
##   form                    Total    Mean   Max   Min
##   <fct>                   <dbl>   <dbl> <dbl> <dbl>
## 1 Regulation                524 16.9       37     8
## 2 Directive                 421 13.6       26     4
## 3 Decision                  129  4.16      17     0
## 4 Delegated regulation       12  0.387      8     0
## 5 Implementing regulation    10  0.323      2     0
## 6 Implementing decision       1  0.0323     1     0
## 7 Implementing directive      0  0          0     0
## 8 Delegated directive         0  0          0     0
## 9 Delegated decision          0  0          0     0

Plot 8: Percentage of Rules with Review Provisions by Legislative and Non-Legislative Acts (1993-2023)

# Create data for absolute numbers
rules_count_data <- df_clean %>%
  select(
    year,
    matches("_rules_with_revise_count$")
  ) %>%
  # Pivot to get counts
  pivot_longer(
    cols = matches("_rules_with_revise_count$"),
    names_to = "form",
    values_to = "count",
    names_pattern = "(.+)_rules_with_revise_count"
  ) %>%
  # Set up the same three-column layout
  mutate(
    form = factor(form, levels = c(
      # First column - Legislative acts
      "Directive", 
      "Regulation", 
      "Decision",
      # Second column - Implementing acts
      "Implementing directive",
      "Implementing regulation",
      "Implementing decision",
      # Third column - Delegated acts
      "Delegated directive",
      "Delegated regulation",
      "Delegated decision"
    ))
  ) %>%
  filter(!is.na(form))

# Create the visualization
ggplot(rules_count_data, aes(x = year, y = count)) +
  geom_line(color = "#2c3e50", alpha = 0.7) +
  geom_point(size = 2, color = "#2c3e50") +
  facet_wrap(~ form, ncol = 3) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(
    breaks = seq(0, max(rules_count_data$count, na.rm = TRUE), by = 5)
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    strip.text = element_text(face = "bold"),
    strip.background = element_blank(),
    panel.spacing = unit(2, "lines"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  ) +
  labs(
    title = "Number of Rules with Review and Revise Provisions by Legislative and Non-Legislative Acts",
    subtitle = "Absolute count of rules with review provisions (1993-2023)",
    x = "Year",
    y = "Number of Rules"
  )

Calculate rules ratio data

# Create rules ratio data from the clean dataframe
rules_ratio_data <- df_clean %>%
  # Select relevant columns
  select(
    year,
    matches("_rules_with_revise_count$"),
    matches("_total_rules$")
  ) %>%
  # First pivot the revise counts
  pivot_longer(
    cols = matches("_rules_with_revise_count$"),
    names_to = "form",
    values_to = "revise_count",
    names_pattern = "(.+)_rules_with_revise_count"
  ) %>%
  # Join with total rules
  left_join(
    df_clean %>%
      select(year, matches("_total_rules$")) %>%
      pivot_longer(
        cols = -year,
        names_to = "form",
        values_to = "total_count",
        names_pattern = "(.+)_total_rules"
      ),
    by = c("form", "year")
  ) %>%
  # Calculate ratio and handle NAs
  mutate(
    revise_count = replace_na(revise_count, 0),
    total_count = replace_na(total_count, 0),
    ratio = case_when(
      total_count == 0 ~ 0,
      TRUE ~ (revise_count / total_count) * 100
    )
  ) %>%
  # Filter to keep only the forms we want and set their order
  filter(form %in% c(
    "Directive", "Regulation", "Decision",
    "Implementing directive", "Implementing regulation", "Implementing decision",
    "Delegated directive", "Delegated regulation", "Delegated decision"
  )) %>%
  # Set factor levels for the forms
  mutate(
    form = factor(form, levels = c(
      # First column - Legislative acts
      "Directive", 
      "Regulation", 
      "Decision",
      # Second column - Implementing acts
      "Implementing directive",
      "Implementing regulation",
      "Implementing decision",
      # Third column - Delegated acts
      "Delegated directive",
      "Delegated regulation",
      "Delegated decision"
    ))
  )

p4 <- ggplot(rules_ratio_data, aes(x = year, y = ratio)) +
  geom_line(color = "#2c3e50", alpha = 0.7) +
  geom_point(size = 2, color = "#2c3e50") +
  # Removed geom_text() completely
  facet_wrap(~ form, ncol = 3) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, 55),  # Adjusted for articles which might have higher percentages
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 16, color = "gray30"),
    strip.text = element_text(face = "bold"),
    strip.background = element_blank(),
    panel.spacing = unit(2, "lines"),
    plot.margin = margin(1, 1, 1, 1, "cm")
  ) +
  labs(
    title = "Percentage of Rules with Review and Revise Provisions by Legislative and Non-Legislative Acts (1993-2023)",
    subtitle = "Ratio calculated against total number of each rules",
    x = "Year",
    y = "Percentage"
  )

print(p4)

Calculate summary statistics for rules

rules_summary <- rules_ratio_data %>%
  group_by(form) %>%
  summarise(
    Mean = mean(ratio, na.rm = TRUE),
    Median = median(ratio, na.rm = TRUE),
    SD = sd(ratio, na.rm = TRUE),
    Min = min(ratio, na.rm = TRUE),
    Max = max(ratio, na.rm = TRUE)
  ) %>%
  # Round all numeric columns to 2 decimal places
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  # Sort by mean in descending order
  arrange(desc(Mean))

# Print the summary statistics
print("Summary Statistics of Percentage of Rules with Review Provisions by Form:")
## [1] "Summary Statistics of Percentage of Rules with Review Provisions by Form:"
print(rules_summary, n = Inf)
## # A tibble: 9 × 6
##   form                     Mean Median    SD   Min   Max
##   <fct>                   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Directive               23.0   18.2  13.5   6.32 53.3 
## 2 Regulation               3.62   1.15  4.41  0.3  17.4 
## 3 Decision                 0.42   0.42  0.39  0     1.64
## 4 Delegated regulation     0.3    0     1.09  0     5.84
## 5 Implementing regulation  0.05   0     0.09  0     0.36
## 6 Implementing decision    0.01   0     0.07  0     0.38
## 7 Implementing directive   0      0     0     0     0   
## 8 Delegated directive      0      0     0     0     0   
## 9 Delegated decision       0      0     0     0     0

Articles by Domain

# Define excluded forms
EXCLUDED_FORMS <- c(
  "Decision", "Regulation", "Implementing regulation", 
  "Directive", "Implementing decision", "Delegated regulation", 
  "Implementing directive", "Delegated directive", "Delegated decision"
)

domain_labels <- c(
  'Agriculture Forestry And Fisheries' = 'Agriculture, Forestry & Fisheries',
  'Business And Competition' = 'Business & Competition',
  'Education And Communications' = 'Education & Communications',
  'Production Technology And Research' = 'Production Technology & Research',
  # Keep all other domains as they are
  'European Union' = 'European Union',
  'Agri Foodstuffs' = 'Agri Foodstuffs',
  'Trade' = 'Trade',
  'Geography' = 'Geography',
  'Social Questions' = 'Social Questions',
  'International Relations' = 'International Relations',
  'Economics' = 'Economics',
  'Politics' = 'Politics',
  'Finance' = 'Finance',
  'Environment' = 'Environment',
  'Transport' = 'Transport',
  'Industry' = 'Industry',
  'Employment And Working Conditions' = 'Employment And Working Conditions',
  'Law' = 'Law',
  'Energy' = 'Energy',
  'International Organisations' = 'International Organisations',
  'Science' = 'Science'
)


# Prepare domain count data for articles
articles_domain_data <- df_clean %>%
  select(year, matches("_articles_with_revise_count$")) %>%
  pivot_longer(
    cols = -year,
    names_to = "domain",
    values_to = "count",
    names_pattern = "(.+)_articles_with_revise_count"
  ) %>%
  mutate(domain = str_replace_all(domain, "\\s+", " ")) %>%
  filter(!(domain %in% EXCLUDED_FORMS)) %>%
  mutate(domain = factor(domain, levels = names(domain_labels)))

# Prepare ratio data for articles
articles_ratio_domain_data <- df_clean %>%
  select(
    year,
    matches("_articles_with_revise_count$"),
    matches("_total_articles$")
  ) %>%
  pivot_longer(
    cols = matches("_articles_with_revise_count$"),
    names_to = "domain",
    values_to = "revise_count",
    names_pattern = "(.+)_articles_with_revise_count"
  ) %>%
  # Clean domain names and handle possible double spaces
  mutate(
    domain = stringr::str_replace_all(domain, "\\s+", " "),
    revise_count = replace_na(revise_count, 0)
  ) %>%
  # Exclude forms
  filter(!(domain %in% EXCLUDED_FORMS)) %>%
  # Join with total counts
  left_join(
    df_clean %>%
      select(year, matches("_total_articles$")) %>%
      pivot_longer(
        cols = -year,
        names_to = "domain",
        values_to = "total_count",
        names_pattern = "(.+)_total_articles"
      ) %>%
      mutate(
        domain = stringr::str_replace_all(domain, "\\s+", " "),
        total_count = replace_na(total_count, 0)
      ) %>%
      filter(!(domain %in% EXCLUDED_FORMS)),  # Also exclude forms here
    by = c("domain", "year")
  ) %>%
  # Handle NA values and calculate ratio
  mutate(
    revise_count = replace_na(revise_count, 0),
    total_count = replace_na(total_count, 0),
    ratio = case_when(
      total_count == 0 ~ 0,
      TRUE ~ (revise_count / total_count) * 100
    ),
    # Set factor levels for remaining domains
    domain = factor(domain, levels = c(
      'Agri Foodstuffs', 'Agriculture Forestry And Fisheries', 'Business And Competition',
      'Economics', 'Education And Communications', 'Employment And Working Conditions',
      'Energy', 'Environment', 'European Union', 'Finance', 'Geography', 'Industry',
      'International Organisations', 'International Relations', 'Law', 'Politics',
      'Production Technology And Research', 'Science', 'Social Questions', 'Trade', 'Transport'
    ))
  )

# Calculate average count for each domain to determine order
articles_domain_order <- articles_domain_data %>%
  group_by(domain) %>%
  summarize(avg_count = mean(count, na.rm = TRUE)) %>%
  arrange(desc(avg_count)) %>%
  pull(domain)

# Create plot with ordered domains
ggplot(articles_domain_data %>% 
             mutate(domain = factor(domain, levels = articles_domain_order)), 
       aes(x = year, y = count)) +
  geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) +  # Bar plot
  facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1)) +
  create_base_theme() +
  labs(
    title = "Number of Articles with Review Provisions by Domain",
    subtitle = "1993-2023 (Ordered by Average Count)",
    x = "Year",
    y = "Number of Articles"
  )

Calcualte summary statistics

# Calculate count summary statistics per domain
articles_count_summary <- articles_domain_data %>%
  group_by(domain) %>%
  summarise(
    Total = sum(count, na.rm = TRUE),
    Mean = mean(count, na.rm = TRUE),
    Median = median(count, na.rm = TRUE),
    SD = sd(count, na.rm = TRUE),
    Min = min(count, na.rm = TRUE),
    Max = max(count, na.rm = TRUE)
  ) %>%
  # Round all numeric columns to 2 decimal places
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  # Sort by total in descending order
  arrange(desc(Total)) %>%
  # Rename domains using the same mapping
  mutate(domain = case_when(
    domain == 'Agriculture Forestry And Fisheries' ~ 'Agriculture, Forestry & Fisheries',
    domain == 'Business And Competition' ~ 'Business & Competition',
    domain == 'Education And Communications' ~ 'Education & Communications',
    domain == 'Production Technology And Research' ~ 'Production Technology & Research',
    TRUE ~ as.character(domain)
  ))

print("\nSummary Statistics of Article Counts by Domain:")
## [1] "\nSummary Statistics of Article Counts by Domain:"
print(articles_count_summary, n = Inf)
## # A tibble: 21 × 7
##    domain                             Total  Mean Median    SD   Min   Max
##    <chr>                              <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 European Union                    158.    5.09   4.6   2.4   1.32 10.5 
##  2 Trade                             133.    4.28   3.52  2.48  1.57 12.4 
##  3 Environment                        96.0   3.1    2.78  2     0.25  7.55
##  4 Finance                            89.3   2.88   2.08  2.53  0.5  13.0 
##  5 Social Questions                   73.4   2.37   2.08  1.12  0.53  5.33
##  6 Education & Communications         72.9   2.35   2.18  1     0.4   4.39
##  7 Law                                71.5   2.31   2.17  1.41  0     5.58
##  8 Production Technology & Research   65.6   2.12   1.83  1.15  0.9   6.08
##  9 Transport                          63.4   2.05   1.75  1.31  0.5   5.83
## 10 International Relations            62.4   2.01   1.98  1.09  0.45  4.22
## 11 Business & Competition             60.4   1.95   1.98  1.15  0.2   5.5 
## 12 Agriculture, Forestry & Fisheries  59.3   1.91   1.2   1.59  0     7.17
## 13 Economics                          55.5   1.79   1.45  1.25  0.25  4.9 
## 14 Employment And Working Conditions  36.8   1.19   1.08  0.83  0.2   3.12
## 15 Energy                             36.6   1.18   0.7   1.51  0     6.88
## 16 Geography                          35.2   1.14   0.98  0.85  0     4.53
## 17 Industry                           33.0   1.07   0.9   1.03  0     5.12
## 18 Politics                           31.4   1.01   0.75  0.81  0     3.12
## 19 Agri Foodstuffs                    30.3   0.98   0.73  0.92  0     3.45
## 20 International Organisations         4.79  0.15   0     0.23  0     0.87
## 21 Science                             2.68  0.09   0     0.15  0     0.5

Ratio for Articles with review and revise provisions

# Calculate average ratio for each domain to determine order
articles_ratio_domain_order <- articles_ratio_domain_data %>%
  group_by(domain) %>%
  summarize(avg_ratio = mean(ratio, na.rm = TRUE)) %>%
  arrange(desc(avg_ratio)) %>%
  pull(domain)

# Create plot with ordered domains
ggplot(articles_ratio_domain_data %>% 
             mutate(domain = factor(domain, levels = articles_ratio_domain_order)), 
       aes(x = year, y = ratio)) +
  geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) +  # Bar plot
  facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
  scale_x_continuous(
    breaks = seq(1995, 2020, by = 10),
    limits = c(1993, 2023)
  ) +
  scale_y_continuous(
    labels = function(x) paste0(x, "%"),
    limits = c(0, 7)
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(color = "gray90"),
    panel.grid.major.x = element_line(color = "gray90"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    strip.text = element_text(face = "bold", size = 10),
    strip.background = element_blank(),
    panel.spacing = unit(1.5, "lines"),
    plot.margin = margin(1, 1, 1, 1, "cm"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.title = element_text(size = 12)
  ) +
  labs(
    title = "Percentage of Articles with Review Provisions by Domain",
    x = "Year",
    y = "Percentage of Articles"
  )

Calculate summary statistics

# Calculate summary statistics for ratios per domain
articles_ratio_summary <- articles_ratio_domain_data %>%
  group_by(domain) %>%
  summarise(
    Mean = mean(ratio, na.rm = TRUE),
    Median = median(ratio, na.rm = TRUE),
    SD = sd(ratio, na.rm = TRUE),
    Min = min(ratio, na.rm = TRUE),
    Max = max(ratio, na.rm = TRUE)
  ) %>%
  # Round all numeric columns to 2 decimal places
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  # Sort by mean in descending order
  arrange(desc(Mean)) %>%
  # Rename domains using the same mapping
  mutate(domain = case_when(
    domain == 'Agriculture Forestry And Fisheries' ~ 'Agriculture, Forestry & Fisheries',
    domain == 'Business And Competition' ~ 'Business & Competition',
    domain == 'Education And Communications' ~ 'Education & Communications',
    domain == 'Production Technology And Research' ~ 'Production Technology & Research',
    TRUE ~ as.character(domain)
  ))

# Print the results with percentages
print("Summary Statistics of Percentage of Articles with Review Provisions by Domain:")
## [1] "Summary Statistics of Percentage of Articles with Review Provisions by Domain:"
print(articles_ratio_summary, n = Inf)
## # A tibble: 21 × 6
##    domain                             Mean Median    SD   Min   Max
##    <chr>                             <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 Energy                             1.16   0.97  1.04  0     4.46
##  2 Environment                        1      0.89  0.56  0.19  2.66
##  3 Employment And Working Conditions  0.92   0.78  0.68  0.23  3.83
##  4 Transport                          0.76   0.69  0.53  0.14  2.79
##  5 Law                                0.71   0.72  0.35  0     1.31
##  6 Education & Communications         0.7    0.72  0.34  0.13  1.7 
##  7 Business & Competition             0.68   0.65  0.35  0.11  1.25
##  8 Production Technology & Research   0.62   0.57  0.29  0.18  1.2 
##  9 Social Questions                   0.57   0.57  0.19  0.15  1.04
## 10 Economics                          0.56   0.53  0.3   0.07  1.39
## 11 Finance                            0.53   0.44  0.37  0.1   1.58
## 12 Politics                           0.5    0.43  0.31  0     1.22
## 13 European Union                     0.48   0.44  0.19  0.19  0.86
## 14 Industry                           0.42   0.33  0.38  0     1.97
## 15 International Organisations        0.41   0     0.68  0     3.03
## 16 International Relations            0.33   0.33  0.17  0.09  0.71
## 17 Trade                              0.29   0.23  0.19  0.1   0.89
## 18 Science                            0.22   0     0.4   0     1.65
## 19 Agriculture, Forestry & Fisheries  0.18   0.15  0.13  0     0.57
## 20 Geography                          0.12   0.1   0.08  0     0.36
## 21 Agri Foodstuffs                    0.08   0.06  0.07  0     0.29

Rules by domain

# Create base theme for consistent plotting
create_base_theme <- function() {
  theme_minimal(base_size = 12) +
    theme(
      text = element_text(family = "Times New Roman"),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_line(color = "gray90"),
      plot.title = element_text(size = 11, face = "bold"),
      strip.text = element_text(face = "bold"),
      strip.background = element_blank(),
      panel.spacing = unit(2, "lines"),
      plot.margin = margin(1, 1, 1, 1, "cm"),
      axis.text.x = element_text(angle = 45, hjust = 1)
    )
}

# Define forms to exclude from domain analysis
EXCLUDED_FORMS <- c(
  "Decision", "Regulation", "Implementing regulation", 
  "Directive", "Implementing decision", "Delegated regulation", 
  "Implementing directive", "Delegated directive", "Delegated decision"
)

# Define domain labels for better readability
domain_labels <- c(
  'Agriculture Forestry And Fisheries' = 'Agriculture, Forestry & Fisheries',
  'Business And Competition' = 'Business & Competition',
  'Education And Communications' = 'Education & Communications',
  'Production Technology And Research' = 'Production Technology & Research',
  'European Union' = 'European Union',
  'Agri Foodstuffs' = 'Agri Foodstuffs',
  'Trade' = 'Trade',
  'Geography' = 'Geography',
  'Social Questions' = 'Social Questions',
  'International Relations' = 'International Relations',
  'Economics' = 'Economics',
  'Politics' = 'Politics',
  'Finance' = 'Finance',
  'Environment' = 'Environment',
  'Transport' = 'Transport',
  'Industry' = 'Industry',
  'Employment And Working Conditions' = 'Employment And Working Conditions',
  'Law' = 'Law',
  'Energy' = 'Energy',
  'International Organisations' = 'International Organisations',
  'Science' = 'Science'
)

Data Preperation

# Prepare domain count data
rules_domain_data <- df_clean %>%
  select(year, matches("_rules_with_revise_count$")) %>%
  pivot_longer(
    cols = -year,
    names_to = "domain",
    values_to = "count",
    names_pattern = "(.+)_rules_with_revise_count"
  ) %>%
  mutate(domain = str_replace_all(domain, "\\s+", " ")) %>%
  filter(!(domain %in% EXCLUDED_FORMS)) %>%
  mutate(domain = factor(domain, levels = names(domain_labels)))

rules_ratio_domain_data <- df_clean %>%
  select(
    year,
    matches("_rules_with_revise_count$"),
    matches("_total_rules$")
  ) %>%
  pivot_longer(
    cols = matches("_rules_with_revise_count$"),
    names_to = "domain",
    values_to = "revise_count",
    names_pattern = "(.+)_rules_with_revise_count"
  ) %>%
  # Clean domain names and handle possible double spaces
  mutate(
    domain = stringr::str_replace_all(domain, "\\s+", " "),
    revise_count = replace_na(revise_count, 0)
  ) %>%
  # Exclude forms
  filter(!(domain %in% EXCLUDED_FORMS)) %>%
  # Join with total counts
  left_join(
    df_clean %>%
      select(year, matches("_total_rules$")) %>%
      pivot_longer(
        cols = -year,
        names_to = "domain",
        values_to = "total_count",
        names_pattern = "(.+)_total_rules"
      ) %>%
      mutate(
        domain = stringr::str_replace_all(domain, "\\s+", " "),
        total_count = replace_na(total_count, 0)
      ) %>%
      filter(!(domain %in% EXCLUDED_FORMS)),  # Also exclude forms here
    by = c("domain", "year")
  ) %>%
  # Handle NA values and calculate ratio
  mutate(
    revise_count = replace_na(revise_count, 0),
    total_count = replace_na(total_count, 0),
    ratio = case_when(
      total_count == 0 ~ 0,
      TRUE ~ (revise_count / total_count) * 100
    ),
    # Set factor levels for remaining domains
    domain = factor(domain, levels = c(
      'Agri Foodstuffs', 'Agriculture Forestry And Fisheries', 'Business And Competition',
      'Economics', 'Education And Communications', 'Employment And Working Conditions',
      'Energy', 'Environment', 'European Union', 'Finance', 'Geography', 'Industry',
      'International Organisations', 'International Relations', 'Law', 'Politics',
      'Production Technology And Research', 'Science', 'Social Questions', 'Trade', 'Transport'
    ))
  )

Rules by domain

# Calculate average count for each domain to determine order
domain_order <- rules_domain_data %>%
  group_by(domain) %>%
  summarize(avg_count = mean(count, na.rm = TRUE)) %>%
  arrange(desc(avg_count)) %>%
  pull(domain)

# Create plot with ordered domains
ggplot(rules_domain_data %>% 
             mutate(domain = factor(domain, levels = domain_order)), 
       aes(x = year, y = count)) +
  geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) +  # Changed to bar plot
  facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1)) +
  create_base_theme() +
  labs(
    title = "Number of Rules with Review Provisions by Domain",
    x = "Year",
    y = "Number of Rules"
  )

Calculate summary statistics

# Calculate summary statistics for counts per domain
rules_count_summary <- rules_domain_data %>%
  group_by(domain) %>%
  summarise(
    Total = sum(count, na.rm = TRUE),
    Mean = mean(count, na.rm = TRUE),
    Median = median(count, na.rm = TRUE),
    SD = sd(count, na.rm = TRUE),
    Min = min(count, na.rm = TRUE),
    Max = max(count, na.rm = TRUE)
  ) %>%
  # Round all numeric columns to 2 decimal places
  mutate(across(where(is.numeric), ~round(., 2))) %>%
  # Sort by total in descending order
  arrange(desc(Total)) %>%
  # Rename domains using the same mapping
  mutate(domain = case_when(
    domain == 'Agriculture Forestry And Fisheries' ~ 'Agriculture, Forestry & Fisheries',
    domain == 'Business And Competition' ~ 'Business & Competition',
    domain == 'Education And Communications' ~ 'Education & Communications',
    domain == 'Production Technology And Research' ~ 'Production Technology & Research',
    TRUE ~ as.character(domain)
  ))

# Print the results
print("Summary Statistics of Number of Rules with Review Provisions by Domain:")
## [1] "Summary Statistics of Number of Rules with Review Provisions by Domain:"
print(rules_count_summary, n = Inf)
## # A tibble: 21 × 7
##    domain                             Total  Mean Median    SD   Min   Max
##    <chr>                              <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
##  1 European Union                    143.    4.6    3.87  2.08  1.32  8.58
##  2 Trade                             107.    3.44   3.23  1.58  1.42  7.86
##  3 Environment                        78.2   2.52   2     1.6   0.25  6.57
##  4 Education & Communications         68.6   2.21   2.12  0.9   0.4   3.95
##  5 Social Questions                   66.9   2.16   2.08  0.91  0.37  3.92
##  6 Finance                            65.4   2.11   1.75  1.37  0.5   5.66
##  7 Law                                64.9   2.09   1.87  1.33  0     5.08
##  8 Production Technology & Research   58.8   1.9    1.7   0.87  0.7   4.07
##  9 International Relations            57.5   1.86   1.82  1.02  0.45  4.22
## 10 Transport                          57.1   1.84   1.58  1.08  0.5   4.83
## 11 Business & Competition             49.4   1.59   1.33  0.93  0.2   4.5 
## 12 Agriculture, Forestry & Fisheries  49.2   1.59   1.2   1.19  0     4.83
## 13 Economics                          47.3   1.52   1.33  1     0.25  4.22
## 14 Geography                          31.8   1.03   0.92  0.67  0     3.28
## 15 Energy                             30.1   0.97   0.58  1.25  0     6.02
## 16 Politics                           29.9   0.96   0.75  0.76  0     3.12
## 17 Employment And Working Conditions  29.2   0.94   0.92  0.58  0.2   2.29
## 18 Industry                           28.7   0.93   0.75  0.92  0     5.12
## 19 Agri Foodstuffs                    27.7   0.89   0.73  0.85  0     3.03
## 20 International Organisations         4.34  0.14   0     0.22  0     0.87
## 21 Science                             2.68  0.09   0     0.15  0     0.5
# Calculate average ratio for each domain to determine order
ratio_domain_order <- rules_ratio_domain_data %>%
  group_by(domain) %>%
  summarize(avg_ratio = mean(ratio, na.rm = TRUE)) %>%
  arrange(desc(avg_ratio)) %>%
  pull(domain)

# Create plot with ordered domains
ggplot(rules_ratio_domain_data %>% 
             mutate(domain = factor(domain, levels = ratio_domain_order)), 
       aes(x = year, y = ratio)) +  # Changed from count to ratio
  geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) +  # Changed to bar plot
  facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
  scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
  scale_y_continuous(
    labels = function(x) paste0(x, "%")) +
  create_base_theme() +
  labs(
    title = "Percentage of Rules with Review Provisions by Domain",
    subtitle = "1993-2023 (Ordered by Average Percentage)",
    x = "Year",
    y = "Percentage of Rules"
  )

Calculate summary

# Calculate summary statistics for domain ratios
ratio_summary <- rules_ratio_domain_data %>%
  group_by(domain) %>%
  summarize(avg_ratio = mean(ratio, na.rm = TRUE)) %>%
  arrange(desc(avg_ratio)) %>%
  mutate(avg_ratio = round(avg_ratio, 2))

# Print the results
print("\nAverage ratios by domain:")
## [1] "\nAverage ratios by domain:"
print(ratio_summary)
## # A tibble: 21 × 2
##    domain                             avg_ratio
##    <fct>                                  <dbl>
##  1 Energy                                  6   
##  2 Environment                             4.77
##  3 Law                                     4.47
##  4 Education And Communications            4.3 
##  5 Employment And Working Conditions       3.69
##  6 Transport                               3.4 
##  7 Production Technology And Research      3.13
##  8 Politics                                2.75
##  9 Social Questions                        2.66
## 10 Business And Competition                2.55
## # ℹ 11 more rows