Introduction for 4.1

# Count cases and compute percentages
case_counts <- data %>%
  group_by(case_type) %>%
  summarize(count = n()) %>%
  ungroup() %>%
  mutate(total = sum(count)) %>%
  mutate(percentage = round(count / total * 100, 1)) %>%
  select(case_type, count, percentage)

# Add a total row
case_counts_with_total <- case_counts %>%
  adorn_totals("row") %>%
  mutate(percentage = ifelse(case_type == "Total", 
                             "100.0", 
                             percentage))

# Print formatted table with total
case_table <- case_counts_with_total %>%
  kable(format = "html", caption = "Number of cases and percentage distribution by case type") %>%
  kable_styling()

case_table

Number of cases and percentage distribution by case type
case_type	count	percentage
ANTI-CORRUPTION	418	0.5
CIVIL	31148	33.7
CONSTITUTIONAL	7036	7.6
CRIMINAL	31707	34.3
FAMILY	22096	23.9
Total	92405	100.0

# Exploratory Data Analysis (Objective 1)
###############################################################

# 4.2.1
# Testing for normality
# Histogram of time variables
# Create a histogram of time_in_months
# Compute descriptive statistics
mean_time <- round(mean(data$time_in_months, na.rm = TRUE), 2)
median_time <- round(median(data$time_in_months, na.rm = TRUE), 2)
sd_time <- round(sd(data$time_in_months, na.rm = TRUE), 2)

# Create a data frame for the normal curve
x_vals <- seq(min(data$time_in_months, na.rm = TRUE),
              max(data$time_in_months, na.rm = TRUE), 
              length.out = 200)
normal_curve <- data.frame(
    x = x_vals,
    density = dnorm(x_vals, mean = mean_time, sd = sd_time)
)

# Create histogram with normal curve and labeled stats
ggplot(data, aes(x = time_in_months)) +
    geom_histogram(aes(y = after_stat(density)), 
                   bins = 30, fill = "skyblue", color = "black", alpha = 0.7) +
    labs(title = "Overall Distribution of Time in Months",
         x = "Time in Months",
         y = "Density") +
    theme_minimal() +
    
    # Add normal curve
    geom_line(data = normal_curve, aes(x = x, y = density),
              color = "darkgreen", linewidth = 1) +
    
    # Add vertical line for mean
    geom_vline(xintercept = mean_time, color = "red", linetype = "dashed", linewidth = 1) +
    annotate("text", x = mean_time, y = Inf, label = paste("Mean =", mean_time),
             vjust = 1.5, hjust = 1.1, color = "red", angle = 90, size = 4) +
    
    # Add vertical line for median
    geom_vline(xintercept = median_time, color = "blue", linetype = "dotted", linewidth = 1) +
    annotate("text", x = median_time, y = Inf, label = paste("Median =", median_time),
             vjust = 1.5, hjust = 1.1, color = "blue", angle = 90, size = 4) +
    
    # Annotate summary statistics in top-right corner
    annotate("text", 
             x = Inf, y = Inf, 
             label = paste("Mean =", mean_time, "\nMedian =", median_time, "\nSD =", sd_time),
             hjust = 1.1, vjust = 1.1, size = 4, color = "black", fontface = "bold")

# Exploratory Data Analysis (Objective 1)
###############################################################

# 4.2.1
# Correct one-> Group by case_type and summarize time_in_months
summary_stats <- data %>%
  group_by(case_type) %>%
  summarize(
    mean_time = mean(time_in_months, na.rm = TRUE),
    median_time = median(time_in_months, na.rm = TRUE),
    min_time = min(time_in_months, na.rm = TRUE),
    max_time = max(time_in_months, na.rm = TRUE),
    sd_time = sd(time_in_months, na.rm = TRUE)
  )
# Print formatted table
kable(summary_stats, caption = "Summary statistics of time in months grouped by case type") %>%
  kable_styling()

Summary statistics of time in months grouped by case type
case_type	mean_time	median_time	max_time	sd_time
ANTI-CORRUPTION	14.21770	8	72	16.01312
CIVIL	30.88686	23	130	27.10895
CONSTITUTIONAL	21.53766	14	129	22.23865
CRIMINAL	15.90816	8	126	20.81024
FAMILY	40.19234	35	129	30.14336

# 4.2.2


# 4.2.3
# Distribution by case type

# Compute mean, median, and standard deviation for each case_type
summary_stats <- data %>%
    group_by(case_type) %>%
    summarise(
        Mean = mean(time_in_months, na.rm = TRUE),
        Median = median(time_in_months, na.rm = TRUE),
        SD = sd(time_in_months, na.rm = TRUE)
    ) %>%
    ungroup()

# Generate normal curve data
normal_curves <- summary_stats %>%
    rowwise() %>%
    do({
        case = .$case_type
        mean = .$Mean
        sd = .$SD
        x_vals = seq(mean - 4 * sd, mean + 4 * sd, length.out = 200)
        data.frame(
            case_type = case,
            x = x_vals,
            density = dnorm(x_vals, mean = mean, sd = sd)
        )
    }) %>%
    ungroup()

# Create label positions for SD text
sd_labels <- summary_stats %>%
    mutate(
        x = Mean + SD,  # position label at 1 SD right of mean
        y = dnorm(Mean + SD, mean = Mean, sd = SD),
        label = paste0("SD = ", round(SD, 2))
    )

# Final plot
ggplot(data, aes(x = time_in_months)) +
    geom_histogram(aes(y = after_stat(density)), 
                   binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
    labs(
        title = "Distribution of Time in Months by Case Type",
        x = "Time in Months",
        y = "Density"
    ) +
    facet_wrap(~ case_type, scales = "free") +
    
    # Add vertical lines for mean and median
    geom_vline(data = summary_stats,
               aes(xintercept = Mean, color = "Mean"),
               linetype = "dashed", linewidth = 1) +
    geom_vline(data = summary_stats,
               aes(xintercept = Median, color = "Median"),
               linetype = "dotted", linewidth = 1) +
    
    # Add text labels for mean and median
    geom_text(data = summary_stats,
              aes(x = Mean, y = Inf, label = paste("Mean =", round(Mean, 2))),
              vjust = 1.5, hjust = -0.3, color = "red", size = 3) +
    geom_text(data = summary_stats,
              aes(x = Median, y = Inf, label = paste("Median =", round(Median, 2))),
              vjust = 3, hjust = -0.5, color = "blue", size = 3) +
    
    # Add normal curve
    geom_line(data = normal_curves, 
              aes(x = x, y = density),
              color = "darkgreen", linewidth = 1) +
    
    # Add SD labels
    geom_text(data = sd_labels,
              aes(x = x, y = y, label = label),
              color = "darkgreen", vjust = -1, hjust = -0.1, size = 3) +
    
    # Define colors for mean and median
    scale_color_manual(name = "Statistics",
                       values = c("Mean" = "red", "Median" = "blue")) +
    
    theme_minimal() +
    theme(legend.position = "top")

# 4.2.4
# Group by Appeals and summarize time_in_months
summary_stats <- data %>%
    group_by(Appeals) %>%
    summarize(
        mean_time = mean(time_in_months, na.rm = TRUE),
        median_time = median(time_in_months, na.rm = TRUE),
        min_time = min(time_in_months, na.rm = TRUE),
        max_time = max(time_in_months, na.rm = TRUE),
        sd_time = sd(time_in_months, na.rm = TRUE)
    )

# Print formatted table
kable(summary_stats, caption = "Summary statistics of time in months grouped by Appeals") %>%
    kable_styling()

Summary statistics of time in months grouped by Appeals
Appeals	mean_time	median_time	min_time	max_time	sd_time
0	26.22322	15	0	130	28.47277
1	30.08051	24	0	129	23.09141

# T-test to compare time by Appeals groups
t_test_results <- t.test(time_in_months ~ Appeals, data = data)

# Print the t-test results
cat("T-test comparing time by Appeals groups:\n")

## T-test comparing time by Appeals groups:

print(t_test_results)

## 
##  Welch Two Sample t-test
## 
## data:  time_in_months by Appeals
## t = -20.669, df = 48116, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -4.223075 -3.491502
## sample estimates:
## mean in group 0 mean in group 1 
##        26.22322        30.08051

# Creating the boxplot by appeals
boxplot <- ggplot(data, aes(x = factor(Appeals), y = time_in_months, fill = factor(Appeals))) +
  geom_boxplot() +
  labs(
    title = "Boxplot of time in months by Appeals groups",
    x = "Appeals",
    y = "Time in Months"
  ) +
  scale_fill_manual(
    values = c("0" = "#0073C2", "1" = "#EFC000"),  # Define your custom colors here
    name = "Appeals",
    labels = c("No Appeal", "Appealed")
  ) +
  theme_minimal()

# Print the boxplot
print(boxplot)

# 4.2.5 
# Computing summary statistics grouped by case_type and Appeals
# Compute summary statistics
summary_stats <- data %>%
  group_by(case_type, Appeals = factor(Appeals)) %>%
  summarise(
    Mean = mean(time_in_months, na.rm = TRUE),
    Median = median(time_in_months, na.rm = TRUE),
    SD = sd(time_in_months, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )

# Print styled table
kable(summary_stats, 
      caption = "Summary Statistics of Case Duration by Case Type and Appeal Status") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "condensed"))

Summary Statistics of Case Duration by Case Type and Appeal Status
case_type	Appeals	Mean	Median	SD	n
ANTI-CORRUPTION	0	12.38110	6	14.96820	328
ANTI-CORRUPTION	1	20.91111	14	17.90276	90
CIVIL	0	27.82354	16	28.41501	18310
CIVIL	1	35.25588	31	24.47546	12838
CONSTITUTIONAL	0	21.53702	14	22.23388	7009
CONSTITUTIONAL	1	21.70370	5	23.89048	27
CRIMINAL	0	12.90239	2	20.91305	22211
CRIMINAL	1	22.93861	17	18.77528	9496
FAMILY	0	40.35727	35	30.26458	21505
FAMILY	1	34.19120	31	24.62111	591

# Create grouped boxplot
ggplot(data, aes(x = case_type, y = time_in_months, fill = factor(Appeals))) +
  geom_boxplot() +
  labs(
    title = "Case Duration by Case Type and Appeal Status",
    x = "Case Type",
    y = "Time in Months",
    fill = "Appeals"
  ) +
  scale_fill_discrete(labels = c("No Appeal", "Appealed")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "top") +
  theme_minimal()

# Create survival object
surv_object1 <- Surv(data$time_in_months, data$Event)
surv_object <- Surv(clean_data$time_in_months, clean_data$Event)

#Objective 2

# Kaplan Meier Survival Curve

# Overall KM model
km_overall <- survfit(Surv(time_in_months, Event) ~ 1, data = clean_data)

# Time points
time_points <- c(12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132)

# Summary
summary_overall <- summary(km_overall, times = time_points)

# Format
km_overall_df <- data.frame(
  Time = summary_overall$time,
  Survival_Probability = round(summary_overall$surv, 3),
  Standard_Error = round(summary_overall$std.err, 3),
  CI_Lower = round(summary_overall$lower, 3),
  CI_Upper = round(summary_overall$upper, 3)
)

print(km_overall_df)

##    Time Survival_Probability Standard_Error CI_Lower CI_Upper
## 1    12                0.674          0.002    0.671    0.677
## 2    24                0.472          0.002    0.469    0.476
## 3    36                0.343          0.002    0.339    0.346
## 4    48                0.238          0.001    0.235    0.241
## 5    60                0.163          0.001    0.160    0.165
## 6    72                0.098          0.001    0.096    0.100
## 7    84                0.051          0.001    0.049    0.052
## 8    96                0.025          0.001    0.024    0.026
## 9   108                0.010          0.000    0.010    0.011
## 10  120                0.002          0.000    0.002    0.002

ggsurvplot(km_overall, 
           data = clean_data,   
           xlab = "Time in Months", 
           ylab = "Survival Probability", 
           title = "Overall Kaplan-Meier survival curve",
           conf.int = TRUE,
           break.x.by = 10,
           xlim = c(0, 130))

# Survival curve by appeal status
km_appeal <- survfit(Surv(time_in_months, Event) ~ Appeals, data = clean_data)

# Summary
summary_appeal <- summary(km_appeal, times = time_points)

# Format
km_appeal_df <- data.frame(
  Appeal_Status = summary_appeal$strata,
  Time = summary_appeal$time,
  Survival_Probability = round(summary_appeal$surv, 3),
  Standard_Error = round(summary_appeal$std.err, 3),
  CI_Lower = round(summary_appeal$lower, 3),
  CI_Upper = round(summary_appeal$upper, 3)
)

print(km_appeal_df)

##    Appeal_Status Time Survival_Probability Standard_Error CI_Lower CI_Upper
## 1      Appeals=0   12                0.644          0.002    0.640    0.648
## 2      Appeals=0   24                0.462          0.002    0.458    0.466
## 3      Appeals=0   36                0.348          0.002    0.344    0.352
## 4      Appeals=0   48                0.251          0.002    0.247    0.254
## 5      Appeals=0   60                0.178          0.002    0.175    0.181
## 6      Appeals=0   72                0.112          0.001    0.109    0.114
## 7      Appeals=0   84                0.059          0.001    0.057    0.061
## 8      Appeals=0   96                0.031          0.001    0.030    0.033
## 9      Appeals=0  108                0.013          0.000    0.013    0.014
## 10     Appeals=0  120                0.002          0.000    0.002    0.003
## 11     Appeals=1   12                0.750          0.003    0.745    0.756
## 12     Appeals=1   24                0.499          0.003    0.492    0.505
## 13     Appeals=1   36                0.330          0.003    0.324    0.336
## 14     Appeals=1   48                0.206          0.003    0.201    0.211
## 15     Appeals=1   60                0.124          0.002    0.119    0.128
## 16     Appeals=1   72                0.064          0.002    0.061    0.067
## 17     Appeals=1   84                0.030          0.001    0.027    0.032
## 18     Appeals=1   96                0.010          0.001    0.009    0.012
## 19     Appeals=1  108                0.003          0.000    0.002    0.004
## 20     Appeals=1  120                0.001          0.000    0.000    0.001

ggsurvplot(km_appeal,
           xlab = "Time in Months",
           ylab = "Survival Probability",
           title = "Kaplan-Meier Survival Curves by Appeal Status",
           pval = TRUE,
           conf.int = TRUE,
           legend = "right",
           legend.labs = c("No Appeal", "Appealed"),
           legend.title = "Appeal Status",
           palette = c("steelblue", "gold"))

# Subset data
data_no_appeal <- filter(clean_data, Appeals == 0)
data_appeal <- filter(clean_data, Appeals == 1)

# Fit KM models separately
km_no_appeal <- survfit(Surv(time_in_months, Event) ~ case_type, data = data_no_appeal)
km_appeal <- survfit(Surv(time_in_months, Event) ~ case_type, data = data_appeal)

# Create individual plots
plot_no_appeal <- ggsurvplot(km_no_appeal,
                             data = data_no_appeal,
                             xlab = "Time in Months",
                             ylab = "Survival Probability",
                             title = "KM Survival Curves by Case Type (No Appeal)",
                             pval = TRUE,
                             conf.int = FALSE,
                             legend = "right",
                             legend.title = "Case Type",
                             break.x.by = 10,
                             xlim = c(0, 130),
                             palette = "Dark2")

plot_appeal <- ggsurvplot(km_appeal,
                          data = data_appeal,
                          xlab = "Time in Months",
                          ylab = "Survival Probability",
                          title = "KM Survival Curves by Case Type (With Appeal)",
                          pval = TRUE,
                          conf.int = FALSE,
                          legend = "right",
                          legend.title = "Case Type",
                          break.x.by = 10,
                          xlim = c(0, 130),
                          palette = "Dark2")

# Arrange both plots on one page
arrange_ggsurvplots(list(plot_no_appeal, plot_appeal), 
                    ncol = 1, nrow = 2)

# Survival plot by case type
km <- survfit(Surv(time_in_months, Event) ~ case_type, data = clean_data)

# Plot survival curves
ggsurvplot(km, 
           xlab = "Time in Months", 
           ylab = "Survival Probability",
           pval = TRUE, 
           conf.int = TRUE,
           break.x.by = 10,
           xlim = c(0, 130),
           data = clean_data,
           legend = "right",
           legend.title = "Case type",
           legend.labs = levels(data$case_type),
           title = "Kaplan-Meier Survival curves by case type")

###################################################################
# Cox Proportional Hazards Model
# Cox PH 
overall_coxph <- coxph(surv_object1 ~ case_type + Appeals, data = data)
summary(overall_coxph)

## Call:
## coxph(formula = surv_object1 ~ case_type + Appeals, data = data)
## 
##   n= 92405, number of events= 92404 
## 
##                              coef exp(coef)  se(coef)       z Pr(>|z|)    
## case_typeCIVIL          -0.689615  0.501769  0.049301 -13.988   <2e-16 ***
## case_typeCONSTITUTIONAL -0.469718  0.625179  0.050395  -9.321   <2e-16 ***
## case_typeCRIMINAL       -0.073072  0.929534  0.049252  -1.484    0.138    
## case_typeFAMILY         -1.105360  0.331092  0.049488 -22.336   <2e-16 ***
## Appeals                 -0.312469  0.731638  0.008231 -37.963   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## case_typeCIVIL             0.5018      1.993    0.4556    0.5527
## case_typeCONSTITUTIONAL    0.6252      1.600    0.5664    0.6901
## case_typeCRIMINAL          0.9295      1.076    0.8440    1.0237
## case_typeFAMILY            0.3311      3.020    0.3005    0.3648
## Appeals                    0.7316      1.367    0.7199    0.7435
## 
## Concordance= 0.647  (se = 0.001 )
## Likelihood ratio test= 13211  on 5 df,   p=<2e-16
## Wald test            = 13712  on 5 df,   p=<2e-16
## Score (logrank) test = 14296  on 5 df,   p=<2e-16

# Cox for appeals
cox_appeals <- coxph(Surv(time_in_months, Event) ~ Appeals, data = clean_data)
summary(cox_appeals)

## Call:
## coxph(formula = Surv(time_in_months, Event) ~ Appeals, data = clean_data)
## 
##   n= 81266, number of events= 81265 
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)    
## Appeals 0.060336  1.062194 0.007879 7.658 1.89e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## Appeals     1.062     0.9414     1.046     1.079
## 
## Concordance= 0.487  (se = 0.001 )
## Likelihood ratio test= 58.14  on 1 df,   p=2e-14
## Wald test            = 58.65  on 1 df,   p=2e-14
## Score (logrank) test = 58.66  on 1 df,   p=2e-14

# Cox by case_type

cox_case_type <- coxph(Surv(time_in_months, Event) ~ case_type, data = clean_data)
summary(cox_case_type)

## Call:
## coxph(formula = Surv(time_in_months, Event) ~ case_type, data = clean_data)
## 
##   n= 81266, number of events= 81265 
## 
##                                            coef exp(coef) se(coef)       z
## case_typeCIVIL                         -0.77922   0.45876  0.05194 -15.003
## case_typeCONSTITUTIONAL & HUMAN RIGHTS -0.44440   0.64121  0.05307  -8.374
## case_typeCRIMINAL                      -0.36310   0.69552  0.05200  -6.982
## case_typeFAMILY                        -1.10681   0.33061  0.05212 -21.238
##                                        Pr(>|z|)    
## case_typeCIVIL                          < 2e-16 ***
## case_typeCONSTITUTIONAL & HUMAN RIGHTS  < 2e-16 ***
## case_typeCRIMINAL                      2.91e-12 ***
## case_typeFAMILY                         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                        exp(coef) exp(-coef) lower .95 upper .95
## case_typeCIVIL                            0.4588      2.180    0.4144    0.5079
## case_typeCONSTITUTIONAL & HUMAN RIGHTS    0.6412      1.560    0.5779    0.7115
## case_typeCRIMINAL                         0.6955      1.438    0.6281    0.7701
## case_typeFAMILY                           0.3306      3.025    0.2985    0.3662
## 
## Concordance= 0.594  (se = 0.001 )
## Likelihood ratio test= 6538  on 4 df,   p=<2e-16
## Wald test            = 6586  on 4 df,   p=<2e-16
## Score (logrank) test = 6784  on 4 df,   p=<2e-16

# Weibull Model, accepts non-zero time and therefore wee use the subste of non-zero data
weibull <- survreg(Surv(time_in_months, Event) ~ Appeals + case_type, dist="weibull", data =  clean_data)
summary(weibull)

## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "weibull")
##                                           Value Std. Error      z       p
## (Intercept)                             2.76711    0.04666  59.31 < 2e-16
## Appeals                                 0.12984    0.00782  16.60 < 2e-16
## case_typeCIVIL                          0.67352    0.04688  14.37 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS  0.43235    0.04797   9.01 < 2e-16
## case_typeCRIMINAL                       0.30997    0.04696   6.60 4.1e-11
## case_typeFAMILY                         0.99159    0.04704  21.08 < 2e-16
## Log(scale)                             -0.10188    0.00280 -36.36 < 2e-16
## 
## Scale= 0.903 
## 
## Weibull distribution
## Loglik(model)= -356684.5   Loglik(intercept only)= -359795.2
##  Chisq= 6221.5 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 7 
## n= 81266

# Exponential Model 
exponential <- survreg(Surv(time_in_months , Event)~ Appeals + case_type, dist="exponential", data = clean_data)
summary(exponential)

## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "exponential")
##                                          Value Std. Error     z       p
## (Intercept)                            2.71436    0.05163 52.57 < 2e-16
## Appeals                                0.15319    0.00863 17.75 < 2e-16
## case_typeCIVIL                         0.67858    0.05191 13.07 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.44341    0.05311  8.35 < 2e-16
## case_typeCRIMINAL                      0.30735    0.05200  5.91 3.4e-09
## case_typeFAMILY                        1.01705    0.05208 19.53 < 2e-16
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -357315.9   Loglik(intercept only)= -360096.1
##  Chisq= 5560.28 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 5 
## n= 81266

# Log-normal Model
lognormal <- survreg(Surv(time_in_months , Event) ~ Appeals + case_type, dist="lognormal", data = clean_data)
summary(lognormal)

## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "lognormal")
##                                          Value Std. Error     z      p
## (Intercept)                            2.11022    0.05790 36.45 <2e-16
## Appeals                                0.50621    0.00964 52.53 <2e-16
## case_typeCIVIL                         0.62736    0.05824 10.77 <2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.53741    0.05956  9.02 <2e-16
## case_typeCRIMINAL                      0.18145    0.05835  3.11 0.0019
## case_typeFAMILY                        1.25950    0.05840 21.57 <2e-16
## Log(scale)                             0.11502    0.00248 46.37 <2e-16
## 
## Scale= 1.12 
## 
## Log Normal distribution
## Loglik(model)= -360923.4   Loglik(intercept only)= -365726.5
##  Chisq= 9606.11 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 3 
## n= 81266

# Log-logistic Model
loglogistic <- survreg(Surv(time_in_months , Event) ~ Appeals + case_type, dist="loglogistic", data = clean_data)
summary(loglogistic)

## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "loglogistic")
##                                           Value Std. Error       z       p
## (Intercept)                             2.12486    0.05827   36.46 < 2e-16
## Appeals                                 0.48368    0.00969   49.91 < 2e-16
## case_typeCIVIL                          0.70377    0.05864   12.00 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS  0.58755    0.05994    9.80 < 2e-16
## case_typeCRIMINAL                       0.23650    0.05877    4.02 5.7e-05
## case_typeFAMILY                         1.32960    0.05873   22.64 < 2e-16
## Log(scale)                             -0.44182    0.00291 -152.07 < 2e-16
## 
## Scale= 0.643 
## 
## Log logistic distribution
## Loglik(model)= -361847.7   Loglik(intercept only)= -366605.4
##  Chisq= 9515.4 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 3 
## n= 81266

# Gamma Model
gamma_model <- flexsurvreg(Surv(time_in_months, Event) ~ Appeals + case_type,
                           dist = "gamma", data = clean_data)
summary(gamma_model)

## Appeals=0.279329608938547,case_typeCIVIL=0.371299190313292,case_typeCONSTITUTIONAL & HUMAN RIGHTS=0.0792582383776733,case_typeCRIMINAL=0.28383333743509,case_typeFAMILY=0.26098245268624 
##     time         est         lcl         ucl
## 1      1 0.979006919 0.978371425 0.979593066
## 2      2 0.954061755 0.952999000 0.955073893
## 3      3 0.927981222 0.926576318 0.929338098
## 4      4 0.901496309 0.899809108 0.903123467
## 5      5 0.874971510 0.873061215 0.876824917
## 6      6 0.848623305 0.846519900 0.850645965
## 7      7 0.822591746 0.820393231 0.824729670
## 8      8 0.796971762 0.794630439 0.799230048
## 9      9 0.771829267 0.769388030 0.774176965
## 10    10 0.747210389 0.744739929 0.749638188
## 11    11 0.723147182 0.720629719 0.725665123
## 12    12 0.699661369 0.697099741 0.702203365
## 13    13 0.676766907 0.674183512 0.679374003
## 14    14 0.654471806 0.651905990 0.657132675
## 15    15 0.632779463 0.630256727 0.635483740
## 16    16 0.611689659 0.609157125 0.614437259
## 17    17 0.591199320 0.588682686 0.593951020
## 18    18 0.571303108 0.568760454 0.574024721
## 19    19 0.551993888 0.549459424 0.554710175
## 20    20 0.533263101 0.530720184 0.535982655
## 21    21 0.515101060 0.512559136 0.517818120
## 22    22 0.497497195 0.494948172 0.500207056
## 23    23 0.480440254 0.477918120 0.483138155
## 24    24 0.463918461 0.461374834 0.466600304
## 25    25 0.447919659 0.445379255 0.450581620
## 26    26 0.432431418 0.429902875 0.435070439
## 27    27 0.417441128 0.414912588 0.420071472
## 28    28 0.402936081 0.400415463 0.405539288
## 29    29 0.388903536 0.386415889 0.391482055
## 30    30 0.375330769 0.372863514 0.377920754
## 31    31 0.362205127 0.359760996 0.364760588
## 32    32 0.349514059 0.347094871 0.352089886
## 33    33 0.337245154 0.334822717 0.339811121
## 34    34 0.325386160 0.322994172 0.327913161
## 35    35 0.313925014 0.311575180 0.316389549
## 36    36 0.302849856 0.300519313 0.305305427
## 37    37 0.292149041 0.289845663 0.294590046
## 38    38 0.281811158 0.279543496 0.284199707
## 39    39 0.271825030 0.269576640 0.274198754
## 40    40 0.262179729 0.259942959 0.264514572
## 41    41 0.252864574 0.250641373 0.255174941
## 42    42 0.243869142 0.241661828 0.246162715
## 43    43 0.235183264 0.232995572 0.237455129
## 44    44 0.226797029 0.224624874 0.229054190
## 45    45 0.218700783 0.216548842 0.220940816
## 46    46 0.210885130 0.208769131 0.213110642
## 47    47 0.203340925 0.201264551 0.205540554
## 48    48 0.196059280 0.194008662 0.198241602
## 49    49 0.189031553 0.187004707 0.191197748
## 50    50 0.182249351 0.180246934 0.184398164
## 51    51 0.175704523 0.173726734 0.177842486
## 52    52 0.169389155 0.167437258 0.171514551
## 53    53 0.163295571 0.161370307 0.165406775
## 54    54 0.157416321 0.155518341 0.159511828
## 55    55 0.151744183 0.149872014 0.153822564
## 56    56 0.146272156 0.144419961 0.148306345
## 57    57 0.140993453 0.139174430 0.142987974
## 58    58 0.135901498 0.134115962 0.137873491
## 59    59 0.130989923 0.129234238 0.132930059
## 60    60 0.126252558 0.124526262 0.128165454
## 61    61 0.121683430 0.119982404 0.123564326
## 62    62 0.117276757 0.115617544 0.119131276
## 63    63 0.113026942 0.111391774 0.114837988
## 64    64 0.108928570 0.107319708 0.110713233
## 65    65 0.104976399 0.103391523 0.106734296
## 66    66 0.101165362 0.099611831 0.102896162
## 67    67 0.097490554 0.095968075 0.099193980
## 68    68 0.093947234 0.092455384 0.095611706
## 69    69 0.090530817 0.089069647 0.092155849
## 70    70 0.087236869 0.085806194 0.088832368
## 71    71 0.084061107 0.082659896 0.085627106
## 72    72 0.080999387 0.079626969 0.082535952
## 73    73 0.078047706 0.076701691 0.079564870
## 74    74 0.075202197 0.073881389 0.076699020
## 75    75 0.072459120 0.071163490 0.073922745
## 76    76 0.069814866 0.068544380 0.071245554
## 77    77 0.067265944 0.066020595 0.068664142
## 78    78 0.064808985 0.063592465 0.066175150
## 79    79 0.062440735 0.061258185 0.063775332
## 80    80 0.060158048 0.059005959 0.061464658
## 81    81 0.057957891 0.056831914 0.059236808
## 82    82 0.055837330 0.054737102 0.057088849
## 83    83 0.053793536 0.052718686 0.055017965
## 84    84 0.051823775 0.050773926 0.053021355
## 85    85 0.049925410 0.048899822 0.051096448
## 86    86 0.048095893 0.047090038 0.049240740
## 87    87 0.046332766 0.045346705 0.047451779
## 88    88 0.044633656 0.043670448 0.045727202
## 89    89 0.042996273 0.042057152 0.044064724
## 90    90 0.041418407 0.040502139 0.042462141
## 91    91 0.039897924 0.039004103 0.040917389
## 92    92 0.038432766 0.037561072 0.039429819
## 93    93 0.037020947 0.036168429 0.037993574
## 94    94 0.035660551 0.034826302 0.036609674
## 95    95 0.034349728 0.033533546 0.035276187
## 96    96 0.033086694 0.032291480 0.033990881
## 97    97 0.031869730 0.031095661 0.032752035
## 98    98 0.030697173 0.029942576 0.031557990
## 99    99 0.029567424 0.028830620 0.030408869
## 100  100 0.028478937 0.027759635 0.029302003
## 101  101 0.027430221 0.026728129 0.028235042
## 102  102 0.026419840 0.025734662 0.027204922
## 103  103 0.025446407 0.024777906 0.026214952
## 104  104 0.024508586 0.023856237 0.025260521
## 105  105 0.023605087 0.022968276 0.024339932
## 106  106 0.022734667 0.022113130 0.023452675
## 107  107 0.021896127 0.021289600 0.022597533
## 108  108 0.021088310 0.020496528 0.021773336
## 109  109 0.020310102 0.019734223 0.020977789
## 110  110 0.019560427 0.019000187 0.020209845
## 111  111 0.018838249 0.018292262 0.019469155
## 112  112 0.018142569 0.017610546 0.018755437
## 113  113 0.017472422 0.016954076 0.018067746
## 114  114 0.016826880 0.016321926 0.017407193
## 115  115 0.016205048 0.015713203 0.016769008
## 116  116 0.015606061 0.015127046 0.016154081
## 117  117 0.015029087 0.014562625 0.015561572
## 118  118 0.014473324 0.014019142 0.014990671
## 119  119 0.013937998 0.013495826 0.014440578
## 120  120 0.013422363 0.012991934 0.013910556
## 121  121 0.012925702 0.012506752 0.013399882
## 122  122 0.012447320 0.012039589 0.012907838
## 123  123 0.011986550 0.011589783 0.012433759
## 124  124 0.011542749 0.011156697 0.011977001
## 125  125 0.011115296 0.010739686 0.011536935
## 126  126 0.010703592 0.010337558 0.011112955
## 127  127 0.010307063 0.009950408 0.010704637
## 128  128 0.009925151 0.009577710 0.010311641
## 129  129 0.009557323 0.009219438 0.009933004
## 130  130 0.009203062 0.008875717 0.009568206

# Objective 3

# Compare models using AIC
AIC(overall_coxph, weibull, exponential, lognormal, loglogistic, gamma_model)

## Warning in AIC.default(overall_coxph, weibull, exponential, lognormal,
## loglogistic, : models are not all fitted to the same number of observations

##               df       AIC
## overall_coxph  5 1915086.7
## weibull        7  713383.0
## exponential    6  714643.8
## lognormal      7  721860.8
## loglogistic    7  723709.4
## gamma_model    7  713569.9

Appendix : Counties

# Group by County_No and County, count cases, and add a total row
court_case_counts <- data %>%
  group_by(County_No, court_name, County) %>%
  summarize(count = n()) %>%
  ungroup() %>%
  mutate(total = sum(count)) %>%
  select(County_No, court_name, County, count)

## `summarise()` has grouped output by 'County_No', 'court_name'. You can override
## using the `.groups` argument.

# Add a total row
court_counts_with_total <- court_case_counts %>%
  adorn_totals("row")

# Print formatted table with total
court_name_tables <- court_counts_with_total %>%
  kable(format = "html", caption = "Number of cases per court") %>%
  kable_styling()

court_name_tables

Number of cases per court
County_No	court_name	County	count
1	Mombasa High Court	Mombasa County	4555
3	Malindi High Court	Kilifi County	1740
4	Garsen High Court	Tana River County	557
6	Voi High Court	Taita Taveta County	966
7	Garissa High Court	Garissa County	182
10	Marsabit High Court	Marsabit County	391
12	Meru High Court	Meru County	4321
13	Chuka High Court	Tharaka-Nithi County	950
14	Embu High Court	Embu County	1439
15	Kitui High Court	Kitui County	1603
16	Machakos High Court	Machakos County	3799
17	Makueni High Court	Makueni County	1640
18	Nyandarua High Court	Nyandarua County	640
19	Nyeri High Court	Nyeri County	1607
20	Kerugoya High Court	Kirinyaga County	1211
21	Muranga High Court	Murang’a County	1787
22	Kiambu High Court	Kiambu County	3462
23	Lodwar High Court	Turkana County	326
24	Kapenguria High Court	West Pokot County	270
26	Kitale High Court	Trans-Nzoia County	2410
27	Eldoret High Court	Uasin Gishu County	4139
30	Kabarnet High Court	Baringo County	861
31	Nanyuki High Court	Laikipia County	706
32	Naivasha High Court	Nakuru County	1615
32	Nakuru High Court	Nakuru County	6692
33	Narok High Court	Narok County	670
34	Kajiado High Court	Kajiado County	1258
35	Kericho High Court	Kericho County	1202
36	Bomet High Court	Bomet County	681
37	Kakamega High Court	Kakamega County	2549
38	Vihiga High Court	Vihiga County	71
39	Bungoma High Court	Bungoma County	2092
40	Busia High Court	Busia County	1170
41	Siaya High Court	Siaya County	2279
42	Kisumu High Court	Kisumu County	3160
43	Homabay High Court	Homa Bay County	1881
44	Migori High Court	Migori County	1844
45	Kisii High Court	Kisii County	3184
46	Nyamira High Court	Nyamira County	1184
47	Milimani AntiCorruption & Economic Crimes Division	Nairobi County	418
47	Milimani Civil Division	Nairobi County	6194
47	Milimani Commercial & Tax Division	Nairobi County	20
47	Milimani Constitutional Law & Human Rights Division	Nairobi County	1939
47	Milimani Criminal Division	Nairobi County	2032
47	Milimani Family Division	Nairobi County	9116
47	Milimani Judicial Review Division	Nairobi County	1592
Total			92405

ANALYSIS OF TIME TAKEN FOR A CASE TO BE DETERMINED IN HIGH COURTS OF KENYA

Kandie Alex

2025-08-16

Introduction for 4.1