Show code
# Load all data files
data <- read_csv("metrics_history.csv")
convergence_data <- read_csv("convergence_summary.csv")
step_stats <- read_csv("step_statistics.csv")
# Display data structure
glimpse(data)Rows: 3,196
Columns: 8
$ clusters <dbl> 41, 11, 10, 11, 9, 9, 11, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, …
$ switch_rate <dbl> 0.5215232, 0.4308824, 0.3363914, 0.2912773, 0.2892691, 0…
$ distance <dbl> 1.360000, 1.570000, 2.143333, 2.283333, 2.816667, 2.8200…
$ mix_deviation <dbl> 0.1907738, 0.2501032, 0.3110556, 0.3332302, 0.3464563, 0…
$ share <dbl> 0.6209677, 0.7085106, 0.7804348, 0.8072687, 0.8220245, 0…
$ ghetto_rate <dbl> 31, 57, 116, 142, 156, 165, 171, 182, 184, 185, 188, 188…
$ step <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
$ run_id <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
Show code
# Summary of runs and steps
n_runs <- length(unique(data$run_id))
n_steps <- length(unique(data$step))
metrics <- c("clusters", "switch_rate", "distance", "mix_deviation", "share", "ghetto_rate")
# Calculate convergence statistics
n_converged <- sum(convergence_data$converged)
avg_convergence <- if (n_converged > 0) {
round(mean(convergence_data$convergence_step[convergence_data$converged], na.rm = TRUE), 1)
} else {
NA
}
# Create summary tibble
summary_info <- tibble(
Characteristic = c("Number of runs", "Number of steps per run", "Total observations",
"Runs that converged", "Average convergence step"),
Value = as.character(c(n_runs, n_steps, nrow(data), n_converged, avg_convergence))
)
summary_info |>
kable(caption = "Dataset Characteristics") |>
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Characteristic | Value |
|---|---|
| Number of runs | 100 |
| Number of steps per run | 49 |
| Total observations | 3196 |
| Runs that converged | 100 |
| Average convergence step | 31 |