Show code
# Load all data files
data <- read_csv("metrics_history.csv")
convergence_data <- read_csv("convergence_summary.csv")
step_stats <- read_csv("step_statistics.csv")
# Display data structure
glimpse(data)Rows: 3,024
Columns: 8
$ clusters <dbl> 30, 10, 6, 8, 10, 8, 7, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,…
$ switch_rate <dbl> 0.5040783, 0.3921016, 0.3554572, 0.3007299, 0.2660819, 0…
$ distance <dbl> 1.386667, 1.610000, 1.916667, 2.090000, 2.340000, 2.6966…
$ mix_deviation <dbl> 0.2048734, 0.2590437, 0.2909444, 0.3238770, 0.3448532, 0…
$ share <dbl> 0.6342857, 0.7167868, 0.7529412, 0.7989474, 0.8215417, 0…
$ ghetto_rate <dbl> 32, 63, 96, 134, 148, 164, 178, 183, 192, 192, 192, 192,…
$ step <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
$ run_id <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
Show code
# Summary of runs and steps
n_runs <- length(unique(data$run_id))
n_steps <- length(unique(data$step))
metrics <- c("clusters", "switch_rate", "distance", "mix_deviation", "share", "ghetto_rate")
# Calculate convergence statistics
n_converged <- sum(convergence_data$converged)
avg_convergence <- if (n_converged > 0) {
round(mean(convergence_data$convergence_step[convergence_data$converged], na.rm = TRUE), 1)
} else {
NA
}
# Create summary tibble
summary_info <- tibble(
Characteristic = c("Number of runs", "Number of steps per run", "Total observations",
"Runs that converged", "Average convergence step"),
Value = as.character(c(n_runs, n_steps, nrow(data), n_converged, avg_convergence))
)
summary_info |>
kable(caption = "Dataset Characteristics") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Characteristic | Value |
|---|---|
| Number of runs | 100 |
| Number of steps per run | 41 |
| Total observations | 3024 |
| Runs that converged | 100 |
| Average convergence step | 29.2 |