1 Plot function

#' Plot Density Comparison with Normative and Extremism Points
#'
#' Creates a density plot comparing two score vectors with vertical lines marking
#' the Normative Point (median) and Extremism Point (85th percentile) for each dataset.
#' Also displays the Extremism Level (percentage of scores >= EP) on the plot.
#'
#' @param scores1 Numeric vector. The first set of scores to compare.
#' @param scores2 Numeric vector. The second set of scores to compare.
#' @param title Character string. Plot title (default: "Density Comparison: Scores 1 vs Scores 2").
#'
#' @return A ggplot2 object showing the density comparison plot.
#'
#' @examples
#' scores1 <- rnorm(1000, mean = 50, sd = 15)
#' scores2 <- scores1 * 1.15
#' af_plot_density_comparison(scores1, scores2)
#'
#' @export
af_plot_density_comparison <- function(scores1, scores2, 
                                     title = "Density Comparison: Scores 1 vs Scores 2") {
  
  # Input validation
  if (!is.numeric(scores1) || !is.vector(scores1)) {
    stop("scores1 must be a numeric vector")
  }
  if (!is.numeric(scores2) || !is.vector(scores2)) {
    stop("scores2 must be a numeric vector")
  }
  if (length(scores1) == 0 || length(scores2) == 0) {
    stop("Both score vectors must have at least one element")
  }
  if (any(is.na(scores1)) || any(is.na(scores2))) {
    warning("NA values detected in score vectors")
  }
  if (!is.character(title) || length(title) != 1) {
    stop("title must be a single character string")
  }
  
  # Load required libraries
  if (!require(ggplot2, quietly = TRUE)) {
    stop("ggplot2 package is required but not installed")
  }
  
  # Calculate metrics for scores1
  NP1 <- median(scores1, na.rm = TRUE)
  EP1 <- NP1 + 1.5 * mad(scores1, constant = 1)
  EL1 <- mean(scores1 >= EP1, na.rm = TRUE) * 100
  EIN1 <- mean(scores1[scores1 >= EP1], na.rm = TRUE)
  
  # Calculate metrics for scores2
  NP2 <- median(scores2, na.rm = TRUE)
  EP2 <- NP2 + 1.5 * mad(scores2, constant = 1)
  EL2 <- mean(scores2 >= EP2, na.rm = TRUE) * 100
  EIN2 <- mean(scores2[scores2 >= EP2], na.rm = TRUE)
  
  # Create subtitle with summary statistics
  subtitle_text <- paste0(
    "Dashed lines: Scores 1 (NP=", round(NP1,2), ", EP=", round(EP1,2), ", EL=", round(EL1,1), "% , EIN=", round(EIN1,1), " | ",
    "Dotted lines: Scores 2 (NP=", round(NP2,2), ", EP=", round(EP2,2), ", EL=", round(EL2,1), "% , EIN=", round(EIN2,1), ")"
  )
  
  # Create data frame for plotting
  density_data <- data.frame(
    values = c(scores1, scores2),
    group = rep(c("Scores 1", "Scores 2"), c(length(scores1), length(scores2)))
  )
  
  # Create the plot
  p <- ggplot(density_data, aes(x = values, color = group)) +
    geom_density(size = 1.2, alpha = 0.7) +
    
    # Vertical lines for Scores 1 (bottom)
    geom_vline(xintercept = NP1, color = "#2E86C1", linetype = "dashed", size = 0.8) +
    geom_vline(xintercept = EP1, color = "#E74C3C", linetype = "dashed", size = 0.8) +
    
    # Vertical lines for Scores 2 (top)
    geom_vline(xintercept = NP2, color = "#2E86C1", linetype = "dotted", size = 0.8) +
    geom_vline(xintercept = EP2, color = "#E74C3C", linetype = "dotted", size = 0.8) +
    
    # Annotations for Scores 1 (positioned lower)
    annotate("text", x = NP1, y = 0.005, 
             label = paste0("NP1: ", round(NP1, 2)), 
             angle = 90, vjust = -0.5, hjust = 0, color = "#2E86C1", size = 3.5) +
    annotate("text", x = EP1, y = 0.005, 
             label = paste0("EP1: ", round(EP1, 2), "\nEL1: ", round(EL1, 1), "%"), 
             angle = 90, vjust = -0.5, hjust = 0, color = "#E74C3C", size = 3.5) +
    
    # Annotations for Scores 2 (positioned higher)
    annotate("text", x = NP2, y = 0.025, 
             label = paste0("NP2: ", round(NP2, 2)), 
             angle = 90, vjust = 1.5, hjust = 1, color = "#2E86C1", size = 3.5) +
    annotate("text", x = EP2, y = 0.025, 
             label = paste0("EP2: ", round(EP2, 2), "\nEL2: ", round(EL2, 1), "%"), 
             angle = 90, vjust = 1.5, hjust = 1, color = "#E74C3C", size = 3.5) +
    
    # Styling
    scale_color_manual(values = c("Scores 1" = "#34495E", "Scores 2" = "#E67E22")) +
    labs(
      title = title,
      subtitle = subtitle_text,
      x = "Score Values",
      y = "Density",
      color = "Dataset"
    ) +
    theme_minimal() +
    theme(
      plot.title = element_text(size = 14, face = "bold"),
      plot.subtitle = element_text(size = 10, color = "gray60"),
      legend.position = "top",
      panel.grid.minor = element_blank()
    )
  
  return(p)
}

2 Create right-side skewed scores1 with values between 1 and 7

set.seed(123)  # For reproducibility

# Method: Use beta distribution and scale to 1-7 range
# Beta with alpha < beta creates right skew
n <- 1000
beta_scores <- rbeta(n, shape1 = 1.4, shape2 = 50)  # Right-skewed beta distribution
scores1 <- 1 + beta_scores * 6  # Scale from [0,1] to [1,7]

3 Scenario 1: All population shift.

# Create scores2: same as scores1 but all values increased by 15%
scores2 <- scores1 * 1.15

# Create and display the plot
plot_result1 <- af_plot_density_comparison(scores1, scores2, 
                                           title = "All population shift")
print(plot_result1)

4 Scenario 2: Extremists become more extreme

# Create scores3: same as scores1 but values in the extremism tail increased by 15%
q_threshold <- quantile(scores1, 0.85)
scores3 <- scores1
scores3[scores1 > q_threshold] <- scores3[scores1 > q_threshold] * 1.15

# Call the function with scores1 and scores3
plot_result2 <- af_plot_density_comparison(scores1, scores3, 
                                          title = "Extremists become more extreme")
print(plot_result2)

5 Scenario 3: Extremists become less extreme

# Create scores3: same as scores1 but values in the extremism taildecreased by 15%
q_threshold <- quantile(scores1, 0.85)
scores4 <- scores1
scores4[scores1 > q_threshold] <- scores4[scores1 > q_threshold] * 0.85

# Call the function with scores1 and scores3
plot_result3 <- af_plot_density_comparison(scores1, scores4, 
                                          title = "Extremists become more extreme")
print(plot_result3)