# Load required packages
# install.packages("sass")
library(sass)
library(survminer)
library(dplyr)
library(ggplot2)
library(viridis)
library(knitr)
library(kableExtra)
library(flexsurv)
library(kableExtra)
library(janitor)
# Import the data
data <- read.csv("E:/5-FLASH/Project/high court cases - Final  dataset.csv") 

# Check for any non-positive survival times
summary(data$time_in_months <= 0)
##    Mode   FALSE    TRUE 
## logical   81266   11139
# Removing records with non-positive survival times for non-parametric tests
clean_data <- data[data$time_in_months > 0, ]
# Rename level names
data$case_type <- factor(data$case_type, 
                         levels = c("ANTI-CORRUPTION AND ECONOMIC CRIMES", 
                                    "CIVIL",
                                    "CONSTITUTIONAL & HUMAN RIGHTS",
                                    "CRIMINAL",
                                    "FAMILY"),
                         labels = c("ANTI-CORRUPTION", "CIVIL", "CONSTITUTIONAL", 
                                    "CRIMINAL", "FAMILY"))

Introduction for 4.1

# Count cases and compute percentages
case_counts <- data %>%
  group_by(case_type) %>%
  summarize(count = n()) %>%
  ungroup() %>%
  mutate(total = sum(count)) %>%
  mutate(percentage = round(count / total * 100, 1)) %>%
  select(case_type, count, percentage)

# Add a total row
case_counts_with_total <- case_counts %>%
  adorn_totals("row") %>%
  mutate(percentage = ifelse(case_type == "Total", 
                             "100.0", 
                             percentage))

# Print formatted table with total
case_table <- case_counts_with_total %>%
  kable(format = "html", caption = "Number of cases and percentage distribution by case type") %>%
  kable_styling()

case_table
Number of cases and percentage distribution by case type
case_type count percentage
ANTI-CORRUPTION 418 0.5
CIVIL 31148 33.7
CONSTITUTIONAL 7036 7.6
CRIMINAL 31707 34.3
FAMILY 22096 23.9
Total 92405 100.0
# Exploratory Data Analysis (Objective 1)
###############################################################

# 4.2.1
# Testing for normality
# Histogram of time variables
# Create a histogram of time_in_months
# Compute descriptive statistics
mean_time <- round(mean(data$time_in_months, na.rm = TRUE), 2)
median_time <- round(median(data$time_in_months, na.rm = TRUE), 2)
sd_time <- round(sd(data$time_in_months, na.rm = TRUE), 2)

# Create a data frame for the normal curve
x_vals <- seq(min(data$time_in_months, na.rm = TRUE),
              max(data$time_in_months, na.rm = TRUE), 
              length.out = 200)
normal_curve <- data.frame(
    x = x_vals,
    density = dnorm(x_vals, mean = mean_time, sd = sd_time)
)

# Create histogram with normal curve and labeled stats
ggplot(data, aes(x = time_in_months)) +
    geom_histogram(aes(y = after_stat(density)), 
                   bins = 30, fill = "skyblue", color = "black", alpha = 0.7) +
    labs(title = "Overall Distribution of Time in Months",
         x = "Time in Months",
         y = "Density") +
    theme_minimal() +
    
    # Add normal curve
    geom_line(data = normal_curve, aes(x = x, y = density),
              color = "darkgreen", linewidth = 1) +
    
    # Add vertical line for mean
    geom_vline(xintercept = mean_time, color = "red", linetype = "dashed", linewidth = 1) +
    annotate("text", x = mean_time, y = Inf, label = paste("Mean =", mean_time),
             vjust = 1.5, hjust = 1.1, color = "red", angle = 90, size = 4) +
    
    # Add vertical line for median
    geom_vline(xintercept = median_time, color = "blue", linetype = "dotted", linewidth = 1) +
    annotate("text", x = median_time, y = Inf, label = paste("Median =", median_time),
             vjust = 1.5, hjust = 1.1, color = "blue", angle = 90, size = 4) +
    
    # Annotate summary statistics in top-right corner
    annotate("text", 
             x = Inf, y = Inf, 
             label = paste("Mean =", mean_time, "\nMedian =", median_time, "\nSD =", sd_time),
             hjust = 1.1, vjust = 1.1, size = 4, color = "black", fontface = "bold")

# Exploratory Data Analysis (Objective 1)
###############################################################

# 4.2.1
# Correct one-> Group by case_type and summarize time_in_months
summary_stats <- data %>%
  group_by(case_type) %>%
  summarize(
    mean_time = mean(time_in_months, na.rm = TRUE),
    median_time = median(time_in_months, na.rm = TRUE),
    min_time = min(time_in_months, na.rm = TRUE),
    max_time = max(time_in_months, na.rm = TRUE),
    sd_time = sd(time_in_months, na.rm = TRUE)
  )
# Print formatted table
kable(summary_stats, caption = "Summary statistics of time in months grouped by case type") %>%
  kable_styling()
Summary statistics of time in months grouped by case type
case_type mean_time median_time min_time max_time sd_time
ANTI-CORRUPTION 14.21770 8 0 72 16.01312
CIVIL 30.88686 23 0 130 27.10895
CONSTITUTIONAL 21.53766 14 0 129 22.23865
CRIMINAL 15.90816 8 0 126 20.81024
FAMILY 40.19234 35 0 129 30.14336
# 4.2.2


# 4.2.3
# Distribution by case type

# Compute mean, median, and standard deviation for each case_type
summary_stats <- data %>%
    group_by(case_type) %>%
    summarise(
        Mean = mean(time_in_months, na.rm = TRUE),
        Median = median(time_in_months, na.rm = TRUE),
        SD = sd(time_in_months, na.rm = TRUE)
    ) %>%
    ungroup()

# Generate normal curve data
normal_curves <- summary_stats %>%
    rowwise() %>%
    do({
        case = .$case_type
        mean = .$Mean
        sd = .$SD
        x_vals = seq(mean - 4 * sd, mean + 4 * sd, length.out = 200)
        data.frame(
            case_type = case,
            x = x_vals,
            density = dnorm(x_vals, mean = mean, sd = sd)
        )
    }) %>%
    ungroup()

# Create label positions for SD text
sd_labels <- summary_stats %>%
    mutate(
        x = Mean + SD,  # position label at 1 SD right of mean
        y = dnorm(Mean + SD, mean = Mean, sd = SD),
        label = paste0("SD = ", round(SD, 2))
    )

# Final plot
ggplot(data, aes(x = time_in_months)) +
    geom_histogram(aes(y = after_stat(density)), 
                   binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
    labs(
        title = "Distribution of Time in Months by Case Type",
        x = "Time in Months",
        y = "Density"
    ) +
    facet_wrap(~ case_type, scales = "free") +
    
    # Add vertical lines for mean and median
    geom_vline(data = summary_stats,
               aes(xintercept = Mean, color = "Mean"),
               linetype = "dashed", linewidth = 1) +
    geom_vline(data = summary_stats,
               aes(xintercept = Median, color = "Median"),
               linetype = "dotted", linewidth = 1) +
    
    # Add text labels for mean and median
    geom_text(data = summary_stats,
              aes(x = Mean, y = Inf, label = paste("Mean =", round(Mean, 2))),
              vjust = 1.5, hjust = -0.3, color = "red", size = 3) +
    geom_text(data = summary_stats,
              aes(x = Median, y = Inf, label = paste("Median =", round(Median, 2))),
              vjust = 3, hjust = -0.5, color = "blue", size = 3) +
    
    # Add normal curve
    geom_line(data = normal_curves, 
              aes(x = x, y = density),
              color = "darkgreen", linewidth = 1) +
    
    # Add SD labels
    geom_text(data = sd_labels,
              aes(x = x, y = y, label = label),
              color = "darkgreen", vjust = -1, hjust = -0.1, size = 3) +
    
    # Define colors for mean and median
    scale_color_manual(name = "Statistics",
                       values = c("Mean" = "red", "Median" = "blue")) +
    
    theme_minimal() +
    theme(legend.position = "top")

# 4.2.4
# Group by Appeals and summarize time_in_months
summary_stats <- data %>%
    group_by(Appeals) %>%
    summarize(
        mean_time = mean(time_in_months, na.rm = TRUE),
        median_time = median(time_in_months, na.rm = TRUE),
        min_time = min(time_in_months, na.rm = TRUE),
        max_time = max(time_in_months, na.rm = TRUE),
        sd_time = sd(time_in_months, na.rm = TRUE)
    )

# Print formatted table
kable(summary_stats, caption = "Summary statistics of time in months grouped by Appeals") %>%
    kable_styling()
Summary statistics of time in months grouped by Appeals
Appeals mean_time median_time min_time max_time sd_time
0 26.22322 15 0 130 28.47277
1 30.08051 24 0 129 23.09141
# T-test to compare time by Appeals groups
t_test_results <- t.test(time_in_months ~ Appeals, data = data)

# Print the t-test results
cat("T-test comparing time by Appeals groups:\n")
## T-test comparing time by Appeals groups:
print(t_test_results)
## 
##  Welch Two Sample t-test
## 
## data:  time_in_months by Appeals
## t = -20.669, df = 48116, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -4.223075 -3.491502
## sample estimates:
## mean in group 0 mean in group 1 
##        26.22322        30.08051
# Creating the boxplot by appeals
boxplot <- ggplot(data, aes(x = factor(Appeals), y = time_in_months, fill = factor(Appeals))) +
  geom_boxplot() +
  labs(
    title = "Boxplot of time in months by Appeals groups",
    x = "Appeals",
    y = "Time in Months"
  ) +
  scale_fill_manual(
    values = c("0" = "#0073C2", "1" = "#EFC000"),  # Define your custom colors here
    name = "Appeals",
    labels = c("No Appeal", "Appealed")
  ) +
  theme_minimal()

# Print the boxplot
print(boxplot)

# 4.2.5 
# Computing summary statistics grouped by case_type and Appeals
# Compute summary statistics
summary_stats <- data %>%
  group_by(case_type, Appeals = factor(Appeals)) %>%
  summarise(
    Mean = mean(time_in_months, na.rm = TRUE),
    Median = median(time_in_months, na.rm = TRUE),
    SD = sd(time_in_months, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )

# Print styled table
kable(summary_stats, 
      caption = "Summary Statistics of Case Duration by Case Type and Appeal Status") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "condensed"))
Summary Statistics of Case Duration by Case Type and Appeal Status
case_type Appeals Mean Median SD n
ANTI-CORRUPTION 0 12.38110 6 14.96820 328
ANTI-CORRUPTION 1 20.91111 14 17.90276 90
CIVIL 0 27.82354 16 28.41501 18310
CIVIL 1 35.25588 31 24.47546 12838
CONSTITUTIONAL 0 21.53702 14 22.23388 7009
CONSTITUTIONAL 1 21.70370 5 23.89048 27
CRIMINAL 0 12.90239 2 20.91305 22211
CRIMINAL 1 22.93861 17 18.77528 9496
FAMILY 0 40.35727 35 30.26458 21505
FAMILY 1 34.19120 31 24.62111 591
# Create grouped boxplot
ggplot(data, aes(x = case_type, y = time_in_months, fill = factor(Appeals))) +
  geom_boxplot() +
  labs(
    title = "Case Duration by Case Type and Appeal Status",
    x = "Case Type",
    y = "Time in Months",
    fill = "Appeals"
  ) +
  scale_fill_discrete(labels = c("No Appeal", "Appealed")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "top") +
  theme_minimal()

# Create survival object
surv_object1 <- Surv(data$time_in_months, data$Event)
surv_object <- Surv(clean_data$time_in_months, clean_data$Event)

#Objective 2

# Kaplan Meier Survival Curve

# Overall KM model
km_overall <- survfit(Surv(time_in_months, Event) ~ 1, data = clean_data)

# Time points
time_points <- c(12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132)

# Summary
summary_overall <- summary(km_overall, times = time_points)

# Format
km_overall_df <- data.frame(
  Time = summary_overall$time,
  Survival_Probability = round(summary_overall$surv, 3),
  Standard_Error = round(summary_overall$std.err, 3),
  CI_Lower = round(summary_overall$lower, 3),
  CI_Upper = round(summary_overall$upper, 3)
)

print(km_overall_df)
##    Time Survival_Probability Standard_Error CI_Lower CI_Upper
## 1    12                0.674          0.002    0.671    0.677
## 2    24                0.472          0.002    0.469    0.476
## 3    36                0.343          0.002    0.339    0.346
## 4    48                0.238          0.001    0.235    0.241
## 5    60                0.163          0.001    0.160    0.165
## 6    72                0.098          0.001    0.096    0.100
## 7    84                0.051          0.001    0.049    0.052
## 8    96                0.025          0.001    0.024    0.026
## 9   108                0.010          0.000    0.010    0.011
## 10  120                0.002          0.000    0.002    0.002
ggsurvplot(km_overall, 
           data = clean_data,   
           xlab = "Time in Months", 
           ylab = "Survival Probability", 
           title = "Overall Kaplan-Meier survival curve",
           conf.int = TRUE,
           break.x.by = 10,
           xlim = c(0, 130))

# Survival curve by appeal status
km_appeal <- survfit(Surv(time_in_months, Event) ~ Appeals, data = clean_data)

# Summary
summary_appeal <- summary(km_appeal, times = time_points)

# Format
km_appeal_df <- data.frame(
  Appeal_Status = summary_appeal$strata,
  Time = summary_appeal$time,
  Survival_Probability = round(summary_appeal$surv, 3),
  Standard_Error = round(summary_appeal$std.err, 3),
  CI_Lower = round(summary_appeal$lower, 3),
  CI_Upper = round(summary_appeal$upper, 3)
)

print(km_appeal_df)
##    Appeal_Status Time Survival_Probability Standard_Error CI_Lower CI_Upper
## 1      Appeals=0   12                0.644          0.002    0.640    0.648
## 2      Appeals=0   24                0.462          0.002    0.458    0.466
## 3      Appeals=0   36                0.348          0.002    0.344    0.352
## 4      Appeals=0   48                0.251          0.002    0.247    0.254
## 5      Appeals=0   60                0.178          0.002    0.175    0.181
## 6      Appeals=0   72                0.112          0.001    0.109    0.114
## 7      Appeals=0   84                0.059          0.001    0.057    0.061
## 8      Appeals=0   96                0.031          0.001    0.030    0.033
## 9      Appeals=0  108                0.013          0.000    0.013    0.014
## 10     Appeals=0  120                0.002          0.000    0.002    0.003
## 11     Appeals=1   12                0.750          0.003    0.745    0.756
## 12     Appeals=1   24                0.499          0.003    0.492    0.505
## 13     Appeals=1   36                0.330          0.003    0.324    0.336
## 14     Appeals=1   48                0.206          0.003    0.201    0.211
## 15     Appeals=1   60                0.124          0.002    0.119    0.128
## 16     Appeals=1   72                0.064          0.002    0.061    0.067
## 17     Appeals=1   84                0.030          0.001    0.027    0.032
## 18     Appeals=1   96                0.010          0.001    0.009    0.012
## 19     Appeals=1  108                0.003          0.000    0.002    0.004
## 20     Appeals=1  120                0.001          0.000    0.000    0.001
ggsurvplot(km_appeal,
           xlab = "Time in Months",
           ylab = "Survival Probability",
           title = "Kaplan-Meier Survival Curves by Appeal Status",
           pval = TRUE,
           conf.int = TRUE,
           legend = "right",
           legend.labs = c("No Appeal", "Appealed"),
           legend.title = "Appeal Status",
           palette = c("steelblue", "gold"))

# Subset data
data_no_appeal <- filter(clean_data, Appeals == 0)
data_appeal <- filter(clean_data, Appeals == 1)

# Fit KM models separately
km_no_appeal <- survfit(Surv(time_in_months, Event) ~ case_type, data = data_no_appeal)
km_appeal <- survfit(Surv(time_in_months, Event) ~ case_type, data = data_appeal)

# Create individual plots
plot_no_appeal <- ggsurvplot(km_no_appeal,
                             data = data_no_appeal,
                             xlab = "Time in Months",
                             ylab = "Survival Probability",
                             title = "KM Survival Curves by Case Type (No Appeal)",
                             pval = TRUE,
                             conf.int = FALSE,
                             legend = "right",
                             legend.title = "Case Type",
                             break.x.by = 10,
                             xlim = c(0, 130),
                             palette = "Dark2")

plot_appeal <- ggsurvplot(km_appeal,
                          data = data_appeal,
                          xlab = "Time in Months",
                          ylab = "Survival Probability",
                          title = "KM Survival Curves by Case Type (With Appeal)",
                          pval = TRUE,
                          conf.int = FALSE,
                          legend = "right",
                          legend.title = "Case Type",
                          break.x.by = 10,
                          xlim = c(0, 130),
                          palette = "Dark2")

# Arrange both plots on one page
arrange_ggsurvplots(list(plot_no_appeal, plot_appeal), 
                    ncol = 1, nrow = 2)

# Survival plot by case type
km <- survfit(Surv(time_in_months, Event) ~ case_type, data = clean_data)

# Plot survival curves
ggsurvplot(km, 
           xlab = "Time in Months", 
           ylab = "Survival Probability",
           pval = TRUE, 
           conf.int = TRUE,
           break.x.by = 10,
           xlim = c(0, 130),
           data = clean_data,
           legend = "right",
           legend.title = "Case type",
           legend.labs = levels(data$case_type),
           title = "Kaplan-Meier Survival curves by case type")

###################################################################
# Cox Proportional Hazards Model
# Cox PH 
overall_coxph <- coxph(surv_object1 ~ case_type + Appeals, data = data)
summary(overall_coxph)
## Call:
## coxph(formula = surv_object1 ~ case_type + Appeals, data = data)
## 
##   n= 92405, number of events= 92404 
## 
##                              coef exp(coef)  se(coef)       z Pr(>|z|)    
## case_typeCIVIL          -0.689615  0.501769  0.049301 -13.988   <2e-16 ***
## case_typeCONSTITUTIONAL -0.469718  0.625179  0.050395  -9.321   <2e-16 ***
## case_typeCRIMINAL       -0.073072  0.929534  0.049252  -1.484    0.138    
## case_typeFAMILY         -1.105360  0.331092  0.049488 -22.336   <2e-16 ***
## Appeals                 -0.312469  0.731638  0.008231 -37.963   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                         exp(coef) exp(-coef) lower .95 upper .95
## case_typeCIVIL             0.5018      1.993    0.4556    0.5527
## case_typeCONSTITUTIONAL    0.6252      1.600    0.5664    0.6901
## case_typeCRIMINAL          0.9295      1.076    0.8440    1.0237
## case_typeFAMILY            0.3311      3.020    0.3005    0.3648
## Appeals                    0.7316      1.367    0.7199    0.7435
## 
## Concordance= 0.647  (se = 0.001 )
## Likelihood ratio test= 13211  on 5 df,   p=<2e-16
## Wald test            = 13712  on 5 df,   p=<2e-16
## Score (logrank) test = 14296  on 5 df,   p=<2e-16
# Cox for appeals
cox_appeals <- coxph(Surv(time_in_months, Event) ~ Appeals, data = clean_data)
summary(cox_appeals)
## Call:
## coxph(formula = Surv(time_in_months, Event) ~ Appeals, data = clean_data)
## 
##   n= 81266, number of events= 81265 
## 
##             coef exp(coef) se(coef)     z Pr(>|z|)    
## Appeals 0.060336  1.062194 0.007879 7.658 1.89e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## Appeals     1.062     0.9414     1.046     1.079
## 
## Concordance= 0.487  (se = 0.001 )
## Likelihood ratio test= 58.14  on 1 df,   p=2e-14
## Wald test            = 58.65  on 1 df,   p=2e-14
## Score (logrank) test = 58.66  on 1 df,   p=2e-14
# Cox by case_type

cox_case_type <- coxph(Surv(time_in_months, Event) ~ case_type, data = clean_data)
summary(cox_case_type)
## Call:
## coxph(formula = Surv(time_in_months, Event) ~ case_type, data = clean_data)
## 
##   n= 81266, number of events= 81265 
## 
##                                            coef exp(coef) se(coef)       z
## case_typeCIVIL                         -0.77922   0.45876  0.05194 -15.003
## case_typeCONSTITUTIONAL & HUMAN RIGHTS -0.44440   0.64121  0.05307  -8.374
## case_typeCRIMINAL                      -0.36310   0.69552  0.05200  -6.982
## case_typeFAMILY                        -1.10681   0.33061  0.05212 -21.238
##                                        Pr(>|z|)    
## case_typeCIVIL                          < 2e-16 ***
## case_typeCONSTITUTIONAL & HUMAN RIGHTS  < 2e-16 ***
## case_typeCRIMINAL                      2.91e-12 ***
## case_typeFAMILY                         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                        exp(coef) exp(-coef) lower .95 upper .95
## case_typeCIVIL                            0.4588      2.180    0.4144    0.5079
## case_typeCONSTITUTIONAL & HUMAN RIGHTS    0.6412      1.560    0.5779    0.7115
## case_typeCRIMINAL                         0.6955      1.438    0.6281    0.7701
## case_typeFAMILY                           0.3306      3.025    0.2985    0.3662
## 
## Concordance= 0.594  (se = 0.001 )
## Likelihood ratio test= 6538  on 4 df,   p=<2e-16
## Wald test            = 6586  on 4 df,   p=<2e-16
## Score (logrank) test = 6784  on 4 df,   p=<2e-16
# Weibull Model, accepts non-zero time and therefore wee use the subste of non-zero data
weibull <- survreg(Surv(time_in_months, Event) ~ Appeals + case_type, dist="weibull", data =  clean_data)
summary(weibull)
## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "weibull")
##                                           Value Std. Error      z       p
## (Intercept)                             2.76711    0.04666  59.31 < 2e-16
## Appeals                                 0.12984    0.00782  16.60 < 2e-16
## case_typeCIVIL                          0.67352    0.04688  14.37 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS  0.43235    0.04797   9.01 < 2e-16
## case_typeCRIMINAL                       0.30997    0.04696   6.60 4.1e-11
## case_typeFAMILY                         0.99159    0.04704  21.08 < 2e-16
## Log(scale)                             -0.10188    0.00280 -36.36 < 2e-16
## 
## Scale= 0.903 
## 
## Weibull distribution
## Loglik(model)= -356684.5   Loglik(intercept only)= -359795.2
##  Chisq= 6221.5 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 7 
## n= 81266
# Exponential Model 
exponential <- survreg(Surv(time_in_months , Event)~ Appeals + case_type, dist="exponential", data = clean_data)
summary(exponential)
## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "exponential")
##                                          Value Std. Error     z       p
## (Intercept)                            2.71436    0.05163 52.57 < 2e-16
## Appeals                                0.15319    0.00863 17.75 < 2e-16
## case_typeCIVIL                         0.67858    0.05191 13.07 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.44341    0.05311  8.35 < 2e-16
## case_typeCRIMINAL                      0.30735    0.05200  5.91 3.4e-09
## case_typeFAMILY                        1.01705    0.05208 19.53 < 2e-16
## 
## Scale fixed at 1 
## 
## Exponential distribution
## Loglik(model)= -357315.9   Loglik(intercept only)= -360096.1
##  Chisq= 5560.28 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 5 
## n= 81266
# Log-normal Model
lognormal <- survreg(Surv(time_in_months , Event) ~ Appeals + case_type, dist="lognormal", data = clean_data)
summary(lognormal)
## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "lognormal")
##                                          Value Std. Error     z      p
## (Intercept)                            2.11022    0.05790 36.45 <2e-16
## Appeals                                0.50621    0.00964 52.53 <2e-16
## case_typeCIVIL                         0.62736    0.05824 10.77 <2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.53741    0.05956  9.02 <2e-16
## case_typeCRIMINAL                      0.18145    0.05835  3.11 0.0019
## case_typeFAMILY                        1.25950    0.05840 21.57 <2e-16
## Log(scale)                             0.11502    0.00248 46.37 <2e-16
## 
## Scale= 1.12 
## 
## Log Normal distribution
## Loglik(model)= -360923.4   Loglik(intercept only)= -365726.5
##  Chisq= 9606.11 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 3 
## n= 81266
# Log-logistic Model
loglogistic <- survreg(Surv(time_in_months , Event) ~ Appeals + case_type, dist="loglogistic", data = clean_data)
summary(loglogistic)
## 
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type, 
##     data = clean_data, dist = "loglogistic")
##                                           Value Std. Error       z       p
## (Intercept)                             2.12486    0.05827   36.46 < 2e-16
## Appeals                                 0.48368    0.00969   49.91 < 2e-16
## case_typeCIVIL                          0.70377    0.05864   12.00 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS  0.58755    0.05994    9.80 < 2e-16
## case_typeCRIMINAL                       0.23650    0.05877    4.02 5.7e-05
## case_typeFAMILY                         1.32960    0.05873   22.64 < 2e-16
## Log(scale)                             -0.44182    0.00291 -152.07 < 2e-16
## 
## Scale= 0.643 
## 
## Log logistic distribution
## Loglik(model)= -361847.7   Loglik(intercept only)= -366605.4
##  Chisq= 9515.4 on 5 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 3 
## n= 81266
# Gamma Model
gamma_model <- flexsurvreg(Surv(time_in_months, Event) ~ Appeals + case_type,
                           dist = "gamma", data = clean_data)
summary(gamma_model)
## Appeals=0.279329608938547,case_typeCIVIL=0.371299190313292,case_typeCONSTITUTIONAL & HUMAN RIGHTS=0.0792582383776733,case_typeCRIMINAL=0.28383333743509,case_typeFAMILY=0.26098245268624 
##     time         est         lcl         ucl
## 1      1 0.979006919 0.978371425 0.979593066
## 2      2 0.954061755 0.952999000 0.955073893
## 3      3 0.927981222 0.926576318 0.929338098
## 4      4 0.901496309 0.899809108 0.903123467
## 5      5 0.874971510 0.873061215 0.876824917
## 6      6 0.848623305 0.846519900 0.850645965
## 7      7 0.822591746 0.820393231 0.824729670
## 8      8 0.796971762 0.794630439 0.799230048
## 9      9 0.771829267 0.769388030 0.774176965
## 10    10 0.747210389 0.744739929 0.749638188
## 11    11 0.723147182 0.720629719 0.725665123
## 12    12 0.699661369 0.697099741 0.702203365
## 13    13 0.676766907 0.674183512 0.679374003
## 14    14 0.654471806 0.651905990 0.657132675
## 15    15 0.632779463 0.630256727 0.635483740
## 16    16 0.611689659 0.609157125 0.614437259
## 17    17 0.591199320 0.588682686 0.593951020
## 18    18 0.571303108 0.568760454 0.574024721
## 19    19 0.551993888 0.549459424 0.554710175
## 20    20 0.533263101 0.530720184 0.535982655
## 21    21 0.515101060 0.512559136 0.517818120
## 22    22 0.497497195 0.494948172 0.500207056
## 23    23 0.480440254 0.477918120 0.483138155
## 24    24 0.463918461 0.461374834 0.466600304
## 25    25 0.447919659 0.445379255 0.450581620
## 26    26 0.432431418 0.429902875 0.435070439
## 27    27 0.417441128 0.414912588 0.420071472
## 28    28 0.402936081 0.400415463 0.405539288
## 29    29 0.388903536 0.386415889 0.391482055
## 30    30 0.375330769 0.372863514 0.377920754
## 31    31 0.362205127 0.359760996 0.364760588
## 32    32 0.349514059 0.347094871 0.352089886
## 33    33 0.337245154 0.334822717 0.339811121
## 34    34 0.325386160 0.322994172 0.327913161
## 35    35 0.313925014 0.311575180 0.316389549
## 36    36 0.302849856 0.300519313 0.305305427
## 37    37 0.292149041 0.289845663 0.294590046
## 38    38 0.281811158 0.279543496 0.284199707
## 39    39 0.271825030 0.269576640 0.274198754
## 40    40 0.262179729 0.259942959 0.264514572
## 41    41 0.252864574 0.250641373 0.255174941
## 42    42 0.243869142 0.241661828 0.246162715
## 43    43 0.235183264 0.232995572 0.237455129
## 44    44 0.226797029 0.224624874 0.229054190
## 45    45 0.218700783 0.216548842 0.220940816
## 46    46 0.210885130 0.208769131 0.213110642
## 47    47 0.203340925 0.201264551 0.205540554
## 48    48 0.196059280 0.194008662 0.198241602
## 49    49 0.189031553 0.187004707 0.191197748
## 50    50 0.182249351 0.180246934 0.184398164
## 51    51 0.175704523 0.173726734 0.177842486
## 52    52 0.169389155 0.167437258 0.171514551
## 53    53 0.163295571 0.161370307 0.165406775
## 54    54 0.157416321 0.155518341 0.159511828
## 55    55 0.151744183 0.149872014 0.153822564
## 56    56 0.146272156 0.144419961 0.148306345
## 57    57 0.140993453 0.139174430 0.142987974
## 58    58 0.135901498 0.134115962 0.137873491
## 59    59 0.130989923 0.129234238 0.132930059
## 60    60 0.126252558 0.124526262 0.128165454
## 61    61 0.121683430 0.119982404 0.123564326
## 62    62 0.117276757 0.115617544 0.119131276
## 63    63 0.113026942 0.111391774 0.114837988
## 64    64 0.108928570 0.107319708 0.110713233
## 65    65 0.104976399 0.103391523 0.106734296
## 66    66 0.101165362 0.099611831 0.102896162
## 67    67 0.097490554 0.095968075 0.099193980
## 68    68 0.093947234 0.092455384 0.095611706
## 69    69 0.090530817 0.089069647 0.092155849
## 70    70 0.087236869 0.085806194 0.088832368
## 71    71 0.084061107 0.082659896 0.085627106
## 72    72 0.080999387 0.079626969 0.082535952
## 73    73 0.078047706 0.076701691 0.079564870
## 74    74 0.075202197 0.073881389 0.076699020
## 75    75 0.072459120 0.071163490 0.073922745
## 76    76 0.069814866 0.068544380 0.071245554
## 77    77 0.067265944 0.066020595 0.068664142
## 78    78 0.064808985 0.063592465 0.066175150
## 79    79 0.062440735 0.061258185 0.063775332
## 80    80 0.060158048 0.059005959 0.061464658
## 81    81 0.057957891 0.056831914 0.059236808
## 82    82 0.055837330 0.054737102 0.057088849
## 83    83 0.053793536 0.052718686 0.055017965
## 84    84 0.051823775 0.050773926 0.053021355
## 85    85 0.049925410 0.048899822 0.051096448
## 86    86 0.048095893 0.047090038 0.049240740
## 87    87 0.046332766 0.045346705 0.047451779
## 88    88 0.044633656 0.043670448 0.045727202
## 89    89 0.042996273 0.042057152 0.044064724
## 90    90 0.041418407 0.040502139 0.042462141
## 91    91 0.039897924 0.039004103 0.040917389
## 92    92 0.038432766 0.037561072 0.039429819
## 93    93 0.037020947 0.036168429 0.037993574
## 94    94 0.035660551 0.034826302 0.036609674
## 95    95 0.034349728 0.033533546 0.035276187
## 96    96 0.033086694 0.032291480 0.033990881
## 97    97 0.031869730 0.031095661 0.032752035
## 98    98 0.030697173 0.029942576 0.031557990
## 99    99 0.029567424 0.028830620 0.030408869
## 100  100 0.028478937 0.027759635 0.029302003
## 101  101 0.027430221 0.026728129 0.028235042
## 102  102 0.026419840 0.025734662 0.027204922
## 103  103 0.025446407 0.024777906 0.026214952
## 104  104 0.024508586 0.023856237 0.025260521
## 105  105 0.023605087 0.022968276 0.024339932
## 106  106 0.022734667 0.022113130 0.023452675
## 107  107 0.021896127 0.021289600 0.022597533
## 108  108 0.021088310 0.020496528 0.021773336
## 109  109 0.020310102 0.019734223 0.020977789
## 110  110 0.019560427 0.019000187 0.020209845
## 111  111 0.018838249 0.018292262 0.019469155
## 112  112 0.018142569 0.017610546 0.018755437
## 113  113 0.017472422 0.016954076 0.018067746
## 114  114 0.016826880 0.016321926 0.017407193
## 115  115 0.016205048 0.015713203 0.016769008
## 116  116 0.015606061 0.015127046 0.016154081
## 117  117 0.015029087 0.014562625 0.015561572
## 118  118 0.014473324 0.014019142 0.014990671
## 119  119 0.013937998 0.013495826 0.014440578
## 120  120 0.013422363 0.012991934 0.013910556
## 121  121 0.012925702 0.012506752 0.013399882
## 122  122 0.012447320 0.012039589 0.012907838
## 123  123 0.011986550 0.011589783 0.012433759
## 124  124 0.011542749 0.011156697 0.011977001
## 125  125 0.011115296 0.010739686 0.011536935
## 126  126 0.010703592 0.010337558 0.011112955
## 127  127 0.010307063 0.009950408 0.010704637
## 128  128 0.009925151 0.009577710 0.010311641
## 129  129 0.009557323 0.009219438 0.009933004
## 130  130 0.009203062 0.008875717 0.009568206
# Objective 3

# Compare models using AIC
AIC(overall_coxph, weibull, exponential, lognormal, loglogistic, gamma_model)
## Warning in AIC.default(overall_coxph, weibull, exponential, lognormal,
## loglogistic, : models are not all fitted to the same number of observations
##               df       AIC
## overall_coxph  5 1915086.7
## weibull        7  713383.0
## exponential    6  714643.8
## lognormal      7  721860.8
## loglogistic    7  723709.4
## gamma_model    7  713569.9

Appendix : Counties

# Group by County_No and County, count cases, and add a total row
court_case_counts <- data %>%
  group_by(County_No, court_name, County) %>%
  summarize(count = n()) %>%
  ungroup() %>%
  mutate(total = sum(count)) %>%
  select(County_No, court_name, County, count)
## `summarise()` has grouped output by 'County_No', 'court_name'. You can override
## using the `.groups` argument.
# Add a total row
court_counts_with_total <- court_case_counts %>%
  adorn_totals("row")

# Print formatted table with total
court_name_tables <- court_counts_with_total %>%
  kable(format = "html", caption = "Number of cases per court") %>%
  kable_styling()

court_name_tables
Number of cases per court
County_No court_name County count
1 Mombasa High Court Mombasa County 4555
3 Malindi High Court Kilifi County 1740
4 Garsen High Court Tana River County 557
6 Voi High Court Taita Taveta County 966
7 Garissa High Court Garissa County 182
10 Marsabit High Court Marsabit County 391
12 Meru High Court Meru County 4321
13 Chuka High Court Tharaka-Nithi County 950
14 Embu High Court Embu County 1439
15 Kitui High Court Kitui County 1603
16 Machakos High Court Machakos County 3799
17 Makueni High Court Makueni County 1640
18 Nyandarua High Court Nyandarua County 640
19 Nyeri High Court Nyeri County 1607
20 Kerugoya High Court Kirinyaga County 1211
21 Muranga High Court Murang’a County 1787
22 Kiambu High Court Kiambu County 3462
23 Lodwar High Court Turkana County 326
24 Kapenguria High Court West Pokot County 270
26 Kitale High Court Trans-Nzoia County 2410
27 Eldoret High Court Uasin Gishu County 4139
30 Kabarnet High Court Baringo County 861
31 Nanyuki High Court Laikipia County 706
32 Naivasha High Court Nakuru County 1615
32 Nakuru High Court Nakuru County 6692
33 Narok High Court Narok County 670
34 Kajiado High Court Kajiado County 1258
35 Kericho High Court Kericho County 1202
36 Bomet High Court Bomet County 681
37 Kakamega High Court Kakamega County 2549
38 Vihiga High Court Vihiga County 71
39 Bungoma High Court Bungoma County 2092
40 Busia High Court Busia County 1170
41 Siaya High Court Siaya County 2279
42 Kisumu High Court Kisumu County 3160
43 Homabay High Court Homa Bay County 1881
44 Migori High Court Migori County 1844
45 Kisii High Court Kisii County 3184
46 Nyamira High Court Nyamira County 1184
47 Milimani AntiCorruption & Economic Crimes Division Nairobi County 418
47 Milimani Civil Division Nairobi County 6194
47 Milimani Commercial & Tax Division Nairobi County 20
47 Milimani Constitutional Law & Human Rights Division Nairobi County 1939
47 Milimani Criminal Division Nairobi County 2032
47 Milimani Family Division Nairobi County 9116
47 Milimani Judicial Review Division Nairobi County 1592
Total
92405