Introduction
This analysis examines the evolution of review and revise provisions in EU legislation from 1993 to 2023, focusing on both articles and rules across different types of rules and domains.
#load required libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(patchwork)
library(stringr)
library(scales)
Data Preparation
Analyze in General
analyze_total_provisions <- function(data) {
# Calculate totals across all years
total_stats <- data %>%
summarise(
total_articles = sum(total_articles),
articles_with_revise = sum(articles_with_revise),
total_rules = sum(total_rules),
rules_with_revise = sum(rules_with_revise)
)
# Print results
cat("\nTOTAL STATISTICS ACROSS ALL YEARS:\n")
cat("----------------------------------------\n")
cat("ARTICLES:\n")
cat("Total number of articles:", total_stats$total_articles, "\n")
cat("Articles with review/revise provisions:", total_stats$articles_with_revise, "\n")
cat("Percentage of articles with provisions:",
round(total_stats$articles_with_revise / total_stats$total_articles * 100, 2), "%\n")
cat("\nRULES:\n")
cat("Total number of rules:", total_stats$total_rules, "\n")
cat("Rules with review/revise provisions:", total_stats$rules_with_revise, "\n")
cat("Percentage of rules with provisions:",
round(total_stats$rules_with_revise / total_stats$total_rules * 100, 2), "%\n")
return(total_stats)
}
total_stats <- analyze_total_provisions(df)
##
## TOTAL STATISTICS ACROSS ALL YEARS:
## ----------------------------------------
## ARTICLES:
## Total number of articles: 330459
## Articles with review/revise provisions: 1271
## Percentage of articles with provisions: 0.38 %
##
## RULES:
## Total number of rules: 84776
## Rules with review/revise provisions: 1097
## Percentage of rules with provisions: 1.29 %
Visualization
# Base Theme
create_base_theme <- function() {
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 11, face = "bold"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
}
Plot 1: Annual Count of Articles Containing Review and Revise Provisions (1993-2023)
# Calculate ratios
df <- df %>%
mutate(
articles_revise_ratio = articles_with_revise/total_articles,
rules_revise_ratio = rules_with_revise/total_rules
)
# Articles with revise clauses plot
ggplot(df, aes(x = year, y = articles_with_revise)) +
geom_area(alpha = 0.1, fill = "#2c3e50") +
geom_line(linewidth = 0.5, color = "#2c3e50", alpha = 0.8) +
geom_point(size = 1.5, color = "#2c3e50", alpha = 0.8) +
geom_smooth(method = "lm", color = "#e74c3c",
linewidth = 0.5, linetype = "dashed",
alpha = 0.7, se = FALSE) +
scale_x_continuous(
breaks = seq(1995, 2020, by = 5),
limits = c(1993, 2023),
expand = c(0.02, 0)
) +
scale_y_continuous(
breaks = seq(0, 100, by = 20),
expand = c(0, 0)
) +
ylim(0, 80) +
theme_minimal(base_size = 14) +
labs(
title = "Review and Revise Provisions in EU Articles",
subtitle = "Annual Count of Articles Containing Review and Revise Provisions (1993-2023)",
x = "Year",
y = "Number of Articles"
)
Calculate summary statistics
# Calculate overall summary statistics for articles with review provisions
overall_stats <- df %>%
summarise(
Total = sum(articles_with_revise, na.rm = TRUE),
Mean = mean(articles_with_revise, na.rm = TRUE),
Median = median(articles_with_revise, na.rm = TRUE),
Max = max(articles_with_revise, na.rm = TRUE),
Min = min(articles_with_revise, na.rm = TRUE)
) %>%
# Round all numeric values to 2 decimal places
mutate(across(everything(), ~round(., 2)))
# Print the overall statistics
print("Overall Statistics of Articles with Review Provisions (1993-2023):")
## [1] "Overall Statistics of Articles with Review Provisions (1993-2023):"
print(overall_stats)
## Total Mean Median Max Min
## 1 1271 41 38 79 20
Plot 2: Percentgage of Articles Containing Review and Revise Provisions (1993-2023)
ggplot(df, aes(x = year, y = articles_with_revise/total_articles)) +
geom_point(size = 2, color = "#2c3e50") +
geom_line(color = "#2c3e50", alpha = 0.7) +
geom_smooth(method = "lm", color = "#c0392b",
linetype = "dashed", size = 0.8, se = FALSE) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(labels = scales::percent) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 11, face = "bold")
) +
labs(
title = "Percentage of Articles with Review Provisions (1993-2023)",
x = "Year",
y = "Percentage of Articles with Review Provisions"
)
Calculate summary statistics for yearly average
# Calculate statistics for the ratio of articles with review provisions
articles_ratio_stats <- df %>%
mutate(ratio = articles_with_revise/total_articles * 100) %>% # Convert to percentage
summarise(
Mean = mean(ratio, na.rm = TRUE),
Median = median(ratio, na.rm = TRUE),
Max = max(ratio, na.rm = TRUE),
Min = min(ratio, na.rm = TRUE)
) %>%
# Round all numeric values to 2 decimal places
mutate(across(everything(), ~round(., 2)))
# Print the statistics
print("Statistics of Percentage of Articles with Review Provisions (1993-2023):")
## [1] "Statistics of Percentage of Articles with Review Provisions (1993-2023):"
print(articles_ratio_stats)
## Mean Median Max Min
## 1 0.39 0.35 0.76 0.2
Plot 3: Number of Rules with Review and Revise Provisions (1993-2023)
ggplot(df, aes(x = year, y = rules_with_revise)) +
# Add subtle area fill for visual weight
geom_area(alpha = 0.1, fill = "#2c3e50") +
# Main line with thinner width
geom_line(linewidth = 0.5, color = "#2c3e50", alpha = 0.8) +
# Smaller points
geom_point(size = 1.5, color = "#2c3e50", alpha = 0.8) +
# Subtle trend line
geom_smooth(method = "lm", color = "#e74c3c",
linewidth = 0.5, linetype = "dashed",
alpha = 0.7, se = FALSE) +
# Adjusted scales
scale_x_continuous(
breaks = seq(1995, 2020, by = 5),
limits = c(1993, 2023),
expand = c(0.02, 0)
) +
scale_y_continuous(
expand = c(0, 0)
) +
ylim (0,70) +
theme_minimal(base_size = 14) +
theme(
text = element_text(family = "Helvetica"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray95"),
plot.background = element_rect(fill = "white", color = NA),
plot.margin = margin(t = 20, r = 20, b = 20, l = 20)
) +
labs(
title = "Review and Revise Provisions in EU Rules",
subtitle = "Number of Rules with Review and Revise Provisions (1993-2023)",
x = "Year",
y = "Number of Rules"
)
Calculate summary statistics for yearly average
# Calculate overall summary statistics for rules with review provisions
rules_stats <- df %>%
summarise(
Total = sum(rules_with_revise, na.rm = TRUE),
Mean = mean(rules_with_revise, na.rm = TRUE),
Median = median(rules_with_revise, na.rm = TRUE),
Max = max(rules_with_revise, na.rm = TRUE),
Min = min(rules_with_revise, na.rm = TRUE)
) %>%
# Round all numeric values to 2 decimal places
mutate(across(everything(), ~round(., 2)))
# Print the overall statistics
print("Overall Statistics of Rules with Review Provisions (1993-2023):")
## [1] "Overall Statistics of Rules with Review Provisions (1993-2023):"
print(rules_stats)
## Total Mean Median Max Min
## 1 1097 35.39 33 67 17
Plot 4: Number of Rules with Review and Revise Provisions (1993-2023)
overall_rules_ratio <- df_clean %>%
select(year, rules_with_revise, total_rules) %>%
mutate(ratio = (rules_with_revise / total_rules) * 100)
# Create plot
ggplot(overall_rules_ratio, aes(x = year, y = ratio)) +
# Area and line layers
geom_area(alpha = 0.1, fill = "#2c3e50") +
geom_line(linewidth = 0.5, color = "#2c3e50", alpha = 0.8) +
geom_point(size = 1.5, color = "#2c3e50", alpha = 0.8) +
# Add trend line
geom_smooth(method = "lm", color = "#e74c3c",
linewidth = 0.5, linetype = "dashed",
alpha = 0.7, se = FALSE) +
# Scales
scale_x_continuous(
breaks = seq(1995, 2020, by = 5),
limits = c(1993, 2023),
expand = c(0.02, 0)
) +
scale_y_continuous(
labels = function(x) paste0(x, "%"),
limits = c(0, NA),
breaks = seq(0, max(overall_rules_ratio$ratio, na.rm = TRUE) + 1, by = 0.5)
) +
# Theme
theme_minimal(base_size = 14) +
theme(
text = element_text(family = "Helvetica"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray95"),
plot.background = element_rect(fill = "white", color = NA),
plot.margin = margin(t = 20, r = 20, b = 20, l = 20)
) +
labs(
title = "Review and Revise Provisions in EU Rules",
subtitle = "Percentage of Rules Containing Review and Revise Provisions (1993-2023)",
x = "Year",
y = "Percentage of Rules"
)
Calculate summary statistics
# Calculate statistics for the ratio of rules with review provisions
rules_ratio_stats <- df_clean %>%
mutate(ratio = (rules_with_revise / total_rules) * 100) %>% # Calculate ratio as percentage
summarise(
Mean = mean(ratio, na.rm = TRUE),
Median = median(ratio, na.rm = TRUE),
Max = max(ratio, na.rm = TRUE),
Min = min(ratio, na.rm = TRUE)
) %>%
# Round all numeric values to 2 decimal places
mutate(across(everything(), ~round(., 2)))
# Print the statistics
print("Statistics of Percentage of Rules with Review Provisions (1993-2023):")
## [1] "Statistics of Percentage of Rules with Review Provisions (1993-2023):"
print(rules_ratio_stats)
## Mean Median Max Min
## 1 1.4 1.2 3.65 0.55
Plot 5: Calcualte count of articles with revise and review provisions by legislative and non-legislative acts
# Create data for absolute numbers of articles
articles_count_data <- df_clean %>%
select(
year,
matches("_articles_with_revise_count$")
) %>%
# Pivot to get counts
pivot_longer(
cols = matches("_articles_with_revise_count$"),
names_to = "form",
values_to = "count",
names_pattern = "(.+)_articles_with_revise_count"
) %>%
# Set up the same three-column layout
mutate(
form = factor(form, levels = c(
# First column - Legislative acts
"Directive",
"Regulation",
"Decision",
# Second column - Implementing acts
"Implementing directive",
"Implementing regulation",
"Implementing decision",
# Third column - Delegated acts
"Delegated directive",
"Delegated regulation",
"Delegated decision"
))
) %>%
filter(!is.na(form))
# Create the visualization
ggplot(articles_count_data, aes(x = year, y = count)) +
geom_line(color = "#2c3e50", alpha = 0.7) +
geom_point(size = 2, color = "#2c3e50") +
facet_wrap(~ form, ncol = 3) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(
breaks = seq(0, max(articles_count_data$count, na.rm = TRUE), by = 10)
) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm")
) +
labs(
title = "Number of Articles with Review and Revise Provisions by Legislative and Non-Legislative Acts",
subtitle = "Absolute count of articles with review provisions (1993-2023)",
x = "Year",
y = "Number of Articles"
)
Calculate summary statistics
# Calculate summary statistics for counts
articles_count_summary <- articles_count_data %>%
group_by(form) %>%
summarise(
Total = sum(count, na.rm = TRUE),
Mean = mean(count, na.rm = TRUE),
Max = max(count, na.rm = TRUE),
Min = min(count, na.rm = TRUE)
) %>%
arrange(desc(Total))
# Print summary statistics
print("Summary Statistics of Article Counts by Form:")
## [1] "Summary Statistics of Article Counts by Form:"
print(articles_count_summary, n = Inf)
## # A tibble: 9 × 5
## form Total Mean Max Min
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Regulation 524 16.9 37 8
## 2 Directive 421 13.6 26 4
## 3 Decision 129 4.16 17 0
## 4 Delegated regulation 12 0.387 8 0
## 5 Implementing regulation 10 0.323 2 0
## 6 Implementing decision 1 0.0323 1 0
## 7 Implementing directive 0 0 0 0
## 8 Delegated directive 0 0 0 0
## 9 Delegated decision 0 0 0 0
Plot 6: Percentage of Articles with Review and Revise Provisions by Legislative and Non-Legislative Acts
# Create articles ratio data with similar layout to rules
articles_ratio_data <- df_clean %>%
select(
year,
matches("_articles_with_revise_count$"),
matches("_total_articles$")
) %>%
# First pivot the revise counts
pivot_longer(
cols = matches("_articles_with_revise_count$"),
names_to = "form",
values_to = "revise_count",
names_pattern = "(.+)_articles_with_revise_count"
) %>%
# Join with total articles
left_join(
df_clean %>%
select(year, matches("_total_articles$")) %>%
pivot_longer(
cols = -year,
names_to = "form",
values_to = "total_count",
names_pattern = "(.+)_total_articles"
),
by = c("form", "year")
) %>%
# Calculate ratio and handle NAs
mutate(
revise_count = replace_na(revise_count, 0),
total_count = replace_na(total_count, 0),
ratio = case_when(
total_count == 0 ~ 0,
TRUE ~ (revise_count / total_count) * 100
),
# Set up the same three-column layout as rules
form = factor(form, levels = c(
# First column - Legislative acts
"Directive",
"Regulation",
"Decision",
# Second column - Implementing acts
"Implementing directive",
"Implementing regulation",
"Implementing decision",
# Third column - Delegated acts
"Delegated directive",
"Delegated regulation",
"Delegated decision"
))
) %>%
# Remove any rows where form is NA
filter(!is.na(form))
# Create the visualization
ggplot(articles_ratio_data, aes(x = year, y = ratio)) +
geom_line(color = "#2c3e50", alpha = 0.7) +
geom_point(size = 2, color = "#2c3e50") +
facet_wrap(~ form, ncol = 3) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(
labels = function(x) paste0(x, "%"),
limits = c(0, 5), # Adjusted for articles which might have higher percentages
) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm")
) +
labs(
title = "Percentage of Articles with Review and Revise Provisions by Legislative and Non-Legislative Acts",
subtitle = "Ratio calculated against the number of articles of each type of rules (1993-2023)",
x = "Year",
y = "Percentage"
)
Calculate summary statistics
# Calculate summary statistics for articles
articles_summary <- articles_ratio_data %>%
group_by(form) %>%
summarise(
Mean = mean(ratio, na.rm = TRUE),
Median = median(ratio, na.rm = TRUE),
SD = sd(ratio, na.rm = TRUE),
Min = min(ratio, na.rm = TRUE),
Max = max(ratio, na.rm = TRUE)
) %>%
# Round all numeric columns to 2 decimal places
mutate(across(where(is.numeric), ~round(., 2))) %>%
# Sort by mean in descending order
arrange(desc(Mean))
# Print the summary statistics
print("Summary Statistics of Percentage of Articles with Review Provisions by Form:")
## [1] "Summary Statistics of Percentage of Articles with Review Provisions by Form:"
print(articles_summary, n = Inf)
## # A tibble: 9 × 6
## form Mean Median SD Min Max
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Directive 2 1.71 0.95 0.73 4.69
## 2 Regulation 0.47 0.31 0.39 0.11 1.48
## 3 Decision 0.14 0.12 0.14 0 0.59
## 4 Delegated regulation 0.04 0 0.12 0 0.57
## 5 Implementing regulation 0.01 0 0.03 0 0.11
## 6 Implementing directive 0 0 0 0 0
## 7 Implementing decision 0 0 0.02 0 0.12
## 8 Delegated directive 0 0 0 0 0
## 9 Delegated decision 0 0 0 0 0
Plot 7: Number of Rules with Review Provisions by Legislative and Non-Legislative Acts (1993-2023)
# Create data for absolute numbers
rules_count_data <- df_clean %>%
select(
year,
matches("_rules_with_revise_count$")
) %>%
# Pivot to get counts
pivot_longer(
cols = matches("_rules_with_revise_count$"),
names_to = "form",
values_to = "count",
names_pattern = "(.+)_rules_with_revise_count"
) %>%
# Set up the same three-column layout
mutate(
form = factor(form, levels = c(
# First column - Legislative acts
"Directive",
"Regulation",
"Decision",
# Second column - Implementing acts
"Implementing directive",
"Implementing regulation",
"Implementing decision",
# Third column - Delegated acts
"Delegated directive",
"Delegated regulation",
"Delegated decision"
))
) %>%
filter(!is.na(form))
# Create the visualization
ggplot(rules_count_data, aes(x = year, y = count)) +
geom_line(color = "#2c3e50", alpha = 0.7) +
geom_point(size = 2, color = "#2c3e50") +
facet_wrap(~ form, ncol = 3) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(
breaks = seq(0, max(rules_count_data$count, na.rm = TRUE), by = 5)
) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm")
) +
labs(
title = "Number of Rules with Review and Revise Provisions by Legislative and Non-Legislative Acts",
subtitle = "Absolute count of rules with review provisions (1993-2023)",
x = "Year",
y = "Number of Rules"
)
Calculate summary statistics for counts
count_summary <- rules_count_data %>%
group_by(form) %>%
summarise(
Total = sum(count, na.rm = TRUE),
Mean = mean(count, na.rm = TRUE),
Max = max(count, na.rm = TRUE),
Min = min(count, na.rm = TRUE)
) %>%
arrange(desc(Total))
# Print summary statistics
print("Summary Statistics of Rule Counts by Form:")
## [1] "Summary Statistics of Rule Counts by Form:"
print(count_summary, n = Inf)
## # A tibble: 9 × 5
## form Total Mean Max Min
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Regulation 524 16.9 37 8
## 2 Directive 421 13.6 26 4
## 3 Decision 129 4.16 17 0
## 4 Delegated regulation 12 0.387 8 0
## 5 Implementing regulation 10 0.323 2 0
## 6 Implementing decision 1 0.0323 1 0
## 7 Implementing directive 0 0 0 0
## 8 Delegated directive 0 0 0 0
## 9 Delegated decision 0 0 0 0
Plot 8: Percentage of Rules with Review Provisions by Legislative and Non-Legislative Acts (1993-2023)
# Create data for absolute numbers
rules_count_data <- df_clean %>%
select(
year,
matches("_rules_with_revise_count$")
) %>%
# Pivot to get counts
pivot_longer(
cols = matches("_rules_with_revise_count$"),
names_to = "form",
values_to = "count",
names_pattern = "(.+)_rules_with_revise_count"
) %>%
# Set up the same three-column layout
mutate(
form = factor(form, levels = c(
# First column - Legislative acts
"Directive",
"Regulation",
"Decision",
# Second column - Implementing acts
"Implementing directive",
"Implementing regulation",
"Implementing decision",
# Third column - Delegated acts
"Delegated directive",
"Delegated regulation",
"Delegated decision"
))
) %>%
filter(!is.na(form))
# Create the visualization
ggplot(rules_count_data, aes(x = year, y = count)) +
geom_line(color = "#2c3e50", alpha = 0.7) +
geom_point(size = 2, color = "#2c3e50") +
facet_wrap(~ form, ncol = 3) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(
breaks = seq(0, max(rules_count_data$count, na.rm = TRUE), by = 5)
) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm")
) +
labs(
title = "Number of Rules with Review and Revise Provisions by Legislative and Non-Legislative Acts",
subtitle = "Absolute count of rules with review provisions (1993-2023)",
x = "Year",
y = "Number of Rules"
)
Calculate rules ratio data
# Create rules ratio data from the clean dataframe
rules_ratio_data <- df_clean %>%
# Select relevant columns
select(
year,
matches("_rules_with_revise_count$"),
matches("_total_rules$")
) %>%
# First pivot the revise counts
pivot_longer(
cols = matches("_rules_with_revise_count$"),
names_to = "form",
values_to = "revise_count",
names_pattern = "(.+)_rules_with_revise_count"
) %>%
# Join with total rules
left_join(
df_clean %>%
select(year, matches("_total_rules$")) %>%
pivot_longer(
cols = -year,
names_to = "form",
values_to = "total_count",
names_pattern = "(.+)_total_rules"
),
by = c("form", "year")
) %>%
# Calculate ratio and handle NAs
mutate(
revise_count = replace_na(revise_count, 0),
total_count = replace_na(total_count, 0),
ratio = case_when(
total_count == 0 ~ 0,
TRUE ~ (revise_count / total_count) * 100
)
) %>%
# Filter to keep only the forms we want and set their order
filter(form %in% c(
"Directive", "Regulation", "Decision",
"Implementing directive", "Implementing regulation", "Implementing decision",
"Delegated directive", "Delegated regulation", "Delegated decision"
)) %>%
# Set factor levels for the forms
mutate(
form = factor(form, levels = c(
# First column - Legislative acts
"Directive",
"Regulation",
"Decision",
# Second column - Implementing acts
"Implementing directive",
"Implementing regulation",
"Implementing decision",
# Third column - Delegated acts
"Delegated directive",
"Delegated regulation",
"Delegated decision"
))
)
p4 <- ggplot(rules_ratio_data, aes(x = year, y = ratio)) +
geom_line(color = "#2c3e50", alpha = 0.7) +
geom_point(size = 2, color = "#2c3e50") +
# Removed geom_text() completely
facet_wrap(~ form, ncol = 3) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(
labels = function(x) paste0(x, "%"),
limits = c(0, 55), # Adjusted for articles which might have higher percentages
) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16, color = "gray30"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm")
) +
labs(
title = "Percentage of Rules with Review and Revise Provisions by Legislative and Non-Legislative Acts (1993-2023)",
subtitle = "Ratio calculated against total number of each rules",
x = "Year",
y = "Percentage"
)
print(p4)
Calculate summary statistics for rules
rules_summary <- rules_ratio_data %>%
group_by(form) %>%
summarise(
Mean = mean(ratio, na.rm = TRUE),
Median = median(ratio, na.rm = TRUE),
SD = sd(ratio, na.rm = TRUE),
Min = min(ratio, na.rm = TRUE),
Max = max(ratio, na.rm = TRUE)
) %>%
# Round all numeric columns to 2 decimal places
mutate(across(where(is.numeric), ~round(., 2))) %>%
# Sort by mean in descending order
arrange(desc(Mean))
# Print the summary statistics
print("Summary Statistics of Percentage of Rules with Review Provisions by Form:")
## [1] "Summary Statistics of Percentage of Rules with Review Provisions by Form:"
print(rules_summary, n = Inf)
## # A tibble: 9 × 6
## form Mean Median SD Min Max
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Directive 23.0 18.2 13.5 6.32 53.3
## 2 Regulation 3.62 1.15 4.41 0.3 17.4
## 3 Decision 0.42 0.42 0.39 0 1.64
## 4 Delegated regulation 0.3 0 1.09 0 5.84
## 5 Implementing regulation 0.05 0 0.09 0 0.36
## 6 Implementing decision 0.01 0 0.07 0 0.38
## 7 Implementing directive 0 0 0 0 0
## 8 Delegated directive 0 0 0 0 0
## 9 Delegated decision 0 0 0 0 0
Articles by Domain
# Define excluded forms
EXCLUDED_FORMS <- c(
"Decision", "Regulation", "Implementing regulation",
"Directive", "Implementing decision", "Delegated regulation",
"Implementing directive", "Delegated directive", "Delegated decision"
)
domain_labels <- c(
'Agriculture Forestry And Fisheries' = 'Agriculture, Forestry & Fisheries',
'Business And Competition' = 'Business & Competition',
'Education And Communications' = 'Education & Communications',
'Production Technology And Research' = 'Production Technology & Research',
# Keep all other domains as they are
'European Union' = 'European Union',
'Agri Foodstuffs' = 'Agri Foodstuffs',
'Trade' = 'Trade',
'Geography' = 'Geography',
'Social Questions' = 'Social Questions',
'International Relations' = 'International Relations',
'Economics' = 'Economics',
'Politics' = 'Politics',
'Finance' = 'Finance',
'Environment' = 'Environment',
'Transport' = 'Transport',
'Industry' = 'Industry',
'Employment And Working Conditions' = 'Employment And Working Conditions',
'Law' = 'Law',
'Energy' = 'Energy',
'International Organisations' = 'International Organisations',
'Science' = 'Science'
)
# Prepare domain count data for articles
articles_domain_data <- df_clean %>%
select(year, matches("_articles_with_revise_count$")) %>%
pivot_longer(
cols = -year,
names_to = "domain",
values_to = "count",
names_pattern = "(.+)_articles_with_revise_count"
) %>%
mutate(domain = str_replace_all(domain, "\\s+", " ")) %>%
filter(!(domain %in% EXCLUDED_FORMS)) %>%
mutate(domain = factor(domain, levels = names(domain_labels)))
# Prepare ratio data for articles
articles_ratio_domain_data <- df_clean %>%
select(
year,
matches("_articles_with_revise_count$"),
matches("_total_articles$")
) %>%
pivot_longer(
cols = matches("_articles_with_revise_count$"),
names_to = "domain",
values_to = "revise_count",
names_pattern = "(.+)_articles_with_revise_count"
) %>%
# Clean domain names and handle possible double spaces
mutate(
domain = stringr::str_replace_all(domain, "\\s+", " "),
revise_count = replace_na(revise_count, 0)
) %>%
# Exclude forms
filter(!(domain %in% EXCLUDED_FORMS)) %>%
# Join with total counts
left_join(
df_clean %>%
select(year, matches("_total_articles$")) %>%
pivot_longer(
cols = -year,
names_to = "domain",
values_to = "total_count",
names_pattern = "(.+)_total_articles"
) %>%
mutate(
domain = stringr::str_replace_all(domain, "\\s+", " "),
total_count = replace_na(total_count, 0)
) %>%
filter(!(domain %in% EXCLUDED_FORMS)), # Also exclude forms here
by = c("domain", "year")
) %>%
# Handle NA values and calculate ratio
mutate(
revise_count = replace_na(revise_count, 0),
total_count = replace_na(total_count, 0),
ratio = case_when(
total_count == 0 ~ 0,
TRUE ~ (revise_count / total_count) * 100
),
# Set factor levels for remaining domains
domain = factor(domain, levels = c(
'Agri Foodstuffs', 'Agriculture Forestry And Fisheries', 'Business And Competition',
'Economics', 'Education And Communications', 'Employment And Working Conditions',
'Energy', 'Environment', 'European Union', 'Finance', 'Geography', 'Industry',
'International Organisations', 'International Relations', 'Law', 'Politics',
'Production Technology And Research', 'Science', 'Social Questions', 'Trade', 'Transport'
))
)
# Calculate average count for each domain to determine order
articles_domain_order <- articles_domain_data %>%
group_by(domain) %>%
summarize(avg_count = mean(count, na.rm = TRUE)) %>%
arrange(desc(avg_count)) %>%
pull(domain)
# Create plot with ordered domains
ggplot(articles_domain_data %>%
mutate(domain = factor(domain, levels = articles_domain_order)),
aes(x = year, y = count)) +
geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) + # Bar plot
facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(labels = scales::number_format(accuracy = 0.1)) +
create_base_theme() +
labs(
title = "Number of Articles with Review Provisions by Domain",
subtitle = "1993-2023 (Ordered by Average Count)",
x = "Year",
y = "Number of Articles"
)
Calcualte summary statistics
# Calculate count summary statistics per domain
articles_count_summary <- articles_domain_data %>%
group_by(domain) %>%
summarise(
Total = sum(count, na.rm = TRUE),
Mean = mean(count, na.rm = TRUE),
Median = median(count, na.rm = TRUE),
SD = sd(count, na.rm = TRUE),
Min = min(count, na.rm = TRUE),
Max = max(count, na.rm = TRUE)
) %>%
# Round all numeric columns to 2 decimal places
mutate(across(where(is.numeric), ~round(., 2))) %>%
# Sort by total in descending order
arrange(desc(Total)) %>%
# Rename domains using the same mapping
mutate(domain = case_when(
domain == 'Agriculture Forestry And Fisheries' ~ 'Agriculture, Forestry & Fisheries',
domain == 'Business And Competition' ~ 'Business & Competition',
domain == 'Education And Communications' ~ 'Education & Communications',
domain == 'Production Technology And Research' ~ 'Production Technology & Research',
TRUE ~ as.character(domain)
))
print("\nSummary Statistics of Article Counts by Domain:")
## [1] "\nSummary Statistics of Article Counts by Domain:"
print(articles_count_summary, n = Inf)
## # A tibble: 21 × 7
## domain Total Mean Median SD Min Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 European Union 158. 5.09 4.6 2.4 1.32 10.5
## 2 Trade 133. 4.28 3.52 2.48 1.57 12.4
## 3 Environment 96.0 3.1 2.78 2 0.25 7.55
## 4 Finance 89.3 2.88 2.08 2.53 0.5 13.0
## 5 Social Questions 73.4 2.37 2.08 1.12 0.53 5.33
## 6 Education & Communications 72.9 2.35 2.18 1 0.4 4.39
## 7 Law 71.5 2.31 2.17 1.41 0 5.58
## 8 Production Technology & Research 65.6 2.12 1.83 1.15 0.9 6.08
## 9 Transport 63.4 2.05 1.75 1.31 0.5 5.83
## 10 International Relations 62.4 2.01 1.98 1.09 0.45 4.22
## 11 Business & Competition 60.4 1.95 1.98 1.15 0.2 5.5
## 12 Agriculture, Forestry & Fisheries 59.3 1.91 1.2 1.59 0 7.17
## 13 Economics 55.5 1.79 1.45 1.25 0.25 4.9
## 14 Employment And Working Conditions 36.8 1.19 1.08 0.83 0.2 3.12
## 15 Energy 36.6 1.18 0.7 1.51 0 6.88
## 16 Geography 35.2 1.14 0.98 0.85 0 4.53
## 17 Industry 33.0 1.07 0.9 1.03 0 5.12
## 18 Politics 31.4 1.01 0.75 0.81 0 3.12
## 19 Agri Foodstuffs 30.3 0.98 0.73 0.92 0 3.45
## 20 International Organisations 4.79 0.15 0 0.23 0 0.87
## 21 Science 2.68 0.09 0 0.15 0 0.5
Ratio for Articles with review and revise provisions
# Calculate average ratio for each domain to determine order
articles_ratio_domain_order <- articles_ratio_domain_data %>%
group_by(domain) %>%
summarize(avg_ratio = mean(ratio, na.rm = TRUE)) %>%
arrange(desc(avg_ratio)) %>%
pull(domain)
# Create plot with ordered domains
ggplot(articles_ratio_domain_data %>%
mutate(domain = factor(domain, levels = articles_ratio_domain_order)),
aes(x = year, y = ratio)) +
geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) + # Bar plot
facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
scale_x_continuous(
breaks = seq(1995, 2020, by = 10),
limits = c(1993, 2023)
) +
scale_y_continuous(
labels = function(x) paste0(x, "%"),
limits = c(0, 7)
) +
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_line(color = "gray90"),
panel.grid.major.x = element_line(color = "gray90"),
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
strip.text = element_text(face = "bold", size = 10),
strip.background = element_blank(),
panel.spacing = unit(1.5, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.title = element_text(size = 12)
) +
labs(
title = "Percentage of Articles with Review Provisions by Domain",
x = "Year",
y = "Percentage of Articles"
)
Calculate summary statistics
# Calculate summary statistics for ratios per domain
articles_ratio_summary <- articles_ratio_domain_data %>%
group_by(domain) %>%
summarise(
Mean = mean(ratio, na.rm = TRUE),
Median = median(ratio, na.rm = TRUE),
SD = sd(ratio, na.rm = TRUE),
Min = min(ratio, na.rm = TRUE),
Max = max(ratio, na.rm = TRUE)
) %>%
# Round all numeric columns to 2 decimal places
mutate(across(where(is.numeric), ~round(., 2))) %>%
# Sort by mean in descending order
arrange(desc(Mean)) %>%
# Rename domains using the same mapping
mutate(domain = case_when(
domain == 'Agriculture Forestry And Fisheries' ~ 'Agriculture, Forestry & Fisheries',
domain == 'Business And Competition' ~ 'Business & Competition',
domain == 'Education And Communications' ~ 'Education & Communications',
domain == 'Production Technology And Research' ~ 'Production Technology & Research',
TRUE ~ as.character(domain)
))
# Print the results with percentages
print("Summary Statistics of Percentage of Articles with Review Provisions by Domain:")
## [1] "Summary Statistics of Percentage of Articles with Review Provisions by Domain:"
print(articles_ratio_summary, n = Inf)
## # A tibble: 21 × 6
## domain Mean Median SD Min Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Energy 1.16 0.97 1.04 0 4.46
## 2 Environment 1 0.89 0.56 0.19 2.66
## 3 Employment And Working Conditions 0.92 0.78 0.68 0.23 3.83
## 4 Transport 0.76 0.69 0.53 0.14 2.79
## 5 Law 0.71 0.72 0.35 0 1.31
## 6 Education & Communications 0.7 0.72 0.34 0.13 1.7
## 7 Business & Competition 0.68 0.65 0.35 0.11 1.25
## 8 Production Technology & Research 0.62 0.57 0.29 0.18 1.2
## 9 Social Questions 0.57 0.57 0.19 0.15 1.04
## 10 Economics 0.56 0.53 0.3 0.07 1.39
## 11 Finance 0.53 0.44 0.37 0.1 1.58
## 12 Politics 0.5 0.43 0.31 0 1.22
## 13 European Union 0.48 0.44 0.19 0.19 0.86
## 14 Industry 0.42 0.33 0.38 0 1.97
## 15 International Organisations 0.41 0 0.68 0 3.03
## 16 International Relations 0.33 0.33 0.17 0.09 0.71
## 17 Trade 0.29 0.23 0.19 0.1 0.89
## 18 Science 0.22 0 0.4 0 1.65
## 19 Agriculture, Forestry & Fisheries 0.18 0.15 0.13 0 0.57
## 20 Geography 0.12 0.1 0.08 0 0.36
## 21 Agri Foodstuffs 0.08 0.06 0.07 0 0.29
Rules by domain
# Create base theme for consistent plotting
create_base_theme <- function() {
theme_minimal(base_size = 12) +
theme(
text = element_text(family = "Times New Roman"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
plot.title = element_text(size = 11, face = "bold"),
strip.text = element_text(face = "bold"),
strip.background = element_blank(),
panel.spacing = unit(2, "lines"),
plot.margin = margin(1, 1, 1, 1, "cm"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
}
# Define forms to exclude from domain analysis
EXCLUDED_FORMS <- c(
"Decision", "Regulation", "Implementing regulation",
"Directive", "Implementing decision", "Delegated regulation",
"Implementing directive", "Delegated directive", "Delegated decision"
)
# Define domain labels for better readability
domain_labels <- c(
'Agriculture Forestry And Fisheries' = 'Agriculture, Forestry & Fisheries',
'Business And Competition' = 'Business & Competition',
'Education And Communications' = 'Education & Communications',
'Production Technology And Research' = 'Production Technology & Research',
'European Union' = 'European Union',
'Agri Foodstuffs' = 'Agri Foodstuffs',
'Trade' = 'Trade',
'Geography' = 'Geography',
'Social Questions' = 'Social Questions',
'International Relations' = 'International Relations',
'Economics' = 'Economics',
'Politics' = 'Politics',
'Finance' = 'Finance',
'Environment' = 'Environment',
'Transport' = 'Transport',
'Industry' = 'Industry',
'Employment And Working Conditions' = 'Employment And Working Conditions',
'Law' = 'Law',
'Energy' = 'Energy',
'International Organisations' = 'International Organisations',
'Science' = 'Science'
)
Data Preperation
# Prepare domain count data
rules_domain_data <- df_clean %>%
select(year, matches("_rules_with_revise_count$")) %>%
pivot_longer(
cols = -year,
names_to = "domain",
values_to = "count",
names_pattern = "(.+)_rules_with_revise_count"
) %>%
mutate(domain = str_replace_all(domain, "\\s+", " ")) %>%
filter(!(domain %in% EXCLUDED_FORMS)) %>%
mutate(domain = factor(domain, levels = names(domain_labels)))
rules_ratio_domain_data <- df_clean %>%
select(
year,
matches("_rules_with_revise_count$"),
matches("_total_rules$")
) %>%
pivot_longer(
cols = matches("_rules_with_revise_count$"),
names_to = "domain",
values_to = "revise_count",
names_pattern = "(.+)_rules_with_revise_count"
) %>%
# Clean domain names and handle possible double spaces
mutate(
domain = stringr::str_replace_all(domain, "\\s+", " "),
revise_count = replace_na(revise_count, 0)
) %>%
# Exclude forms
filter(!(domain %in% EXCLUDED_FORMS)) %>%
# Join with total counts
left_join(
df_clean %>%
select(year, matches("_total_rules$")) %>%
pivot_longer(
cols = -year,
names_to = "domain",
values_to = "total_count",
names_pattern = "(.+)_total_rules"
) %>%
mutate(
domain = stringr::str_replace_all(domain, "\\s+", " "),
total_count = replace_na(total_count, 0)
) %>%
filter(!(domain %in% EXCLUDED_FORMS)), # Also exclude forms here
by = c("domain", "year")
) %>%
# Handle NA values and calculate ratio
mutate(
revise_count = replace_na(revise_count, 0),
total_count = replace_na(total_count, 0),
ratio = case_when(
total_count == 0 ~ 0,
TRUE ~ (revise_count / total_count) * 100
),
# Set factor levels for remaining domains
domain = factor(domain, levels = c(
'Agri Foodstuffs', 'Agriculture Forestry And Fisheries', 'Business And Competition',
'Economics', 'Education And Communications', 'Employment And Working Conditions',
'Energy', 'Environment', 'European Union', 'Finance', 'Geography', 'Industry',
'International Organisations', 'International Relations', 'Law', 'Politics',
'Production Technology And Research', 'Science', 'Social Questions', 'Trade', 'Transport'
))
)
Rules by domain
# Calculate average count for each domain to determine order
domain_order <- rules_domain_data %>%
group_by(domain) %>%
summarize(avg_count = mean(count, na.rm = TRUE)) %>%
arrange(desc(avg_count)) %>%
pull(domain)
# Create plot with ordered domains
ggplot(rules_domain_data %>%
mutate(domain = factor(domain, levels = domain_order)),
aes(x = year, y = count)) +
geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) + # Changed to bar plot
facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(labels = scales::number_format(accuracy = 0.1)) +
create_base_theme() +
labs(
title = "Number of Rules with Review Provisions by Domain",
x = "Year",
y = "Number of Rules"
)
Calculate summary statistics
# Calculate summary statistics for counts per domain
rules_count_summary <- rules_domain_data %>%
group_by(domain) %>%
summarise(
Total = sum(count, na.rm = TRUE),
Mean = mean(count, na.rm = TRUE),
Median = median(count, na.rm = TRUE),
SD = sd(count, na.rm = TRUE),
Min = min(count, na.rm = TRUE),
Max = max(count, na.rm = TRUE)
) %>%
# Round all numeric columns to 2 decimal places
mutate(across(where(is.numeric), ~round(., 2))) %>%
# Sort by total in descending order
arrange(desc(Total)) %>%
# Rename domains using the same mapping
mutate(domain = case_when(
domain == 'Agriculture Forestry And Fisheries' ~ 'Agriculture, Forestry & Fisheries',
domain == 'Business And Competition' ~ 'Business & Competition',
domain == 'Education And Communications' ~ 'Education & Communications',
domain == 'Production Technology And Research' ~ 'Production Technology & Research',
TRUE ~ as.character(domain)
))
# Print the results
print("Summary Statistics of Number of Rules with Review Provisions by Domain:")
## [1] "Summary Statistics of Number of Rules with Review Provisions by Domain:"
print(rules_count_summary, n = Inf)
## # A tibble: 21 × 7
## domain Total Mean Median SD Min Max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 European Union 143. 4.6 3.87 2.08 1.32 8.58
## 2 Trade 107. 3.44 3.23 1.58 1.42 7.86
## 3 Environment 78.2 2.52 2 1.6 0.25 6.57
## 4 Education & Communications 68.6 2.21 2.12 0.9 0.4 3.95
## 5 Social Questions 66.9 2.16 2.08 0.91 0.37 3.92
## 6 Finance 65.4 2.11 1.75 1.37 0.5 5.66
## 7 Law 64.9 2.09 1.87 1.33 0 5.08
## 8 Production Technology & Research 58.8 1.9 1.7 0.87 0.7 4.07
## 9 International Relations 57.5 1.86 1.82 1.02 0.45 4.22
## 10 Transport 57.1 1.84 1.58 1.08 0.5 4.83
## 11 Business & Competition 49.4 1.59 1.33 0.93 0.2 4.5
## 12 Agriculture, Forestry & Fisheries 49.2 1.59 1.2 1.19 0 4.83
## 13 Economics 47.3 1.52 1.33 1 0.25 4.22
## 14 Geography 31.8 1.03 0.92 0.67 0 3.28
## 15 Energy 30.1 0.97 0.58 1.25 0 6.02
## 16 Politics 29.9 0.96 0.75 0.76 0 3.12
## 17 Employment And Working Conditions 29.2 0.94 0.92 0.58 0.2 2.29
## 18 Industry 28.7 0.93 0.75 0.92 0 5.12
## 19 Agri Foodstuffs 27.7 0.89 0.73 0.85 0 3.03
## 20 International Organisations 4.34 0.14 0 0.22 0 0.87
## 21 Science 2.68 0.09 0 0.15 0 0.5
# Calculate average ratio for each domain to determine order
ratio_domain_order <- rules_ratio_domain_data %>%
group_by(domain) %>%
summarize(avg_ratio = mean(ratio, na.rm = TRUE)) %>%
arrange(desc(avg_ratio)) %>%
pull(domain)
# Create plot with ordered domains
ggplot(rules_ratio_domain_data %>%
mutate(domain = factor(domain, levels = ratio_domain_order)),
aes(x = year, y = ratio)) + # Changed from count to ratio
geom_col(fill = "#2c3e50", alpha = 0.7, width = 0.8) + # Changed to bar plot
facet_wrap(~ domain, ncol = 3, labeller = labeller(domain = domain_labels)) +
scale_x_continuous(breaks = seq(1993, 2023, by = 5)) +
scale_y_continuous(
labels = function(x) paste0(x, "%")) +
create_base_theme() +
labs(
title = "Percentage of Rules with Review Provisions by Domain",
subtitle = "1993-2023 (Ordered by Average Percentage)",
x = "Year",
y = "Percentage of Rules"
)
Calculate summary
# Calculate summary statistics for domain ratios
ratio_summary <- rules_ratio_domain_data %>%
group_by(domain) %>%
summarize(avg_ratio = mean(ratio, na.rm = TRUE)) %>%
arrange(desc(avg_ratio)) %>%
mutate(avg_ratio = round(avg_ratio, 2))
# Print the results
print("\nAverage ratios by domain:")
## [1] "\nAverage ratios by domain:"
print(ratio_summary)
## # A tibble: 21 × 2
## domain avg_ratio
## <fct> <dbl>
## 1 Energy 6
## 2 Environment 4.77
## 3 Law 4.47
## 4 Education And Communications 4.3
## 5 Employment And Working Conditions 3.69
## 6 Transport 3.4
## 7 Production Technology And Research 3.13
## 8 Politics 2.75
## 9 Social Questions 2.66
## 10 Business And Competition 2.55
## # ℹ 11 more rows