Introduction for 4.1
# Count cases and compute percentages
case_counts <- data %>%
group_by(case_type) %>%
summarize(count = n()) %>%
ungroup() %>%
mutate(total = sum(count)) %>%
mutate(percentage = round(count / total * 100, 1)) %>%
select(case_type, count, percentage)
# Add a total row
case_counts_with_total <- case_counts %>%
adorn_totals("row") %>%
mutate(percentage = ifelse(case_type == "Total",
"100.0",
percentage))
# Print formatted table with total
case_table <- case_counts_with_total %>%
kable(format = "html", caption = "Number of cases and percentage distribution by case type") %>%
kable_styling()
case_table
Number of cases and percentage distribution by case type
|
case_type
|
count
|
percentage
|
|
ANTI-CORRUPTION
|
418
|
0.5
|
|
CIVIL
|
31148
|
33.7
|
|
CONSTITUTIONAL
|
7036
|
7.6
|
|
CRIMINAL
|
31707
|
34.3
|
|
FAMILY
|
22096
|
23.9
|
|
Total
|
92405
|
100.0
|
# Exploratory Data Analysis (Objective 1)
###############################################################
# 4.2.1
# Testing for normality
# Histogram of time variables
# Create a histogram of time_in_months
# Compute descriptive statistics
mean_time <- round(mean(data$time_in_months, na.rm = TRUE), 2)
median_time <- round(median(data$time_in_months, na.rm = TRUE), 2)
sd_time <- round(sd(data$time_in_months, na.rm = TRUE), 2)
# Create a data frame for the normal curve
x_vals <- seq(min(data$time_in_months, na.rm = TRUE),
max(data$time_in_months, na.rm = TRUE),
length.out = 200)
normal_curve <- data.frame(
x = x_vals,
density = dnorm(x_vals, mean = mean_time, sd = sd_time)
)
# Create histogram with normal curve and labeled stats
ggplot(data, aes(x = time_in_months)) +
geom_histogram(aes(y = after_stat(density)),
bins = 30, fill = "skyblue", color = "black", alpha = 0.7) +
labs(title = "Overall Distribution of Time in Months",
x = "Time in Months",
y = "Density") +
theme_minimal() +
# Add normal curve
geom_line(data = normal_curve, aes(x = x, y = density),
color = "darkgreen", linewidth = 1) +
# Add vertical line for mean
geom_vline(xintercept = mean_time, color = "red", linetype = "dashed", linewidth = 1) +
annotate("text", x = mean_time, y = Inf, label = paste("Mean =", mean_time),
vjust = 1.5, hjust = 1.1, color = "red", angle = 90, size = 4) +
# Add vertical line for median
geom_vline(xintercept = median_time, color = "blue", linetype = "dotted", linewidth = 1) +
annotate("text", x = median_time, y = Inf, label = paste("Median =", median_time),
vjust = 1.5, hjust = 1.1, color = "blue", angle = 90, size = 4) +
# Annotate summary statistics in top-right corner
annotate("text",
x = Inf, y = Inf,
label = paste("Mean =", mean_time, "\nMedian =", median_time, "\nSD =", sd_time),
hjust = 1.1, vjust = 1.1, size = 4, color = "black", fontface = "bold")

# Exploratory Data Analysis (Objective 1)
###############################################################
# 4.2.1
# Correct one-> Group by case_type and summarize time_in_months
summary_stats <- data %>%
group_by(case_type) %>%
summarize(
mean_time = mean(time_in_months, na.rm = TRUE),
median_time = median(time_in_months, na.rm = TRUE),
min_time = min(time_in_months, na.rm = TRUE),
max_time = max(time_in_months, na.rm = TRUE),
sd_time = sd(time_in_months, na.rm = TRUE)
)
# Print formatted table
kable(summary_stats, caption = "Summary statistics of time in months grouped by case type") %>%
kable_styling()
Summary statistics of time in months grouped by case type
|
case_type
|
mean_time
|
median_time
|
min_time
|
max_time
|
sd_time
|
|
ANTI-CORRUPTION
|
14.21770
|
8
|
0
|
72
|
16.01312
|
|
CIVIL
|
30.88686
|
23
|
0
|
130
|
27.10895
|
|
CONSTITUTIONAL
|
21.53766
|
14
|
0
|
129
|
22.23865
|
|
CRIMINAL
|
15.90816
|
8
|
0
|
126
|
20.81024
|
|
FAMILY
|
40.19234
|
35
|
0
|
129
|
30.14336
|
# 4.2.2
# 4.2.3
# Distribution by case type
# Compute mean, median, and standard deviation for each case_type
summary_stats <- data %>%
group_by(case_type) %>%
summarise(
Mean = mean(time_in_months, na.rm = TRUE),
Median = median(time_in_months, na.rm = TRUE),
SD = sd(time_in_months, na.rm = TRUE)
) %>%
ungroup()
# Generate normal curve data
normal_curves <- summary_stats %>%
rowwise() %>%
do({
case = .$case_type
mean = .$Mean
sd = .$SD
x_vals = seq(mean - 4 * sd, mean + 4 * sd, length.out = 200)
data.frame(
case_type = case,
x = x_vals,
density = dnorm(x_vals, mean = mean, sd = sd)
)
}) %>%
ungroup()
# Create label positions for SD text
sd_labels <- summary_stats %>%
mutate(
x = Mean + SD, # position label at 1 SD right of mean
y = dnorm(Mean + SD, mean = Mean, sd = SD),
label = paste0("SD = ", round(SD, 2))
)
# Final plot
ggplot(data, aes(x = time_in_months)) +
geom_histogram(aes(y = after_stat(density)),
binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
labs(
title = "Distribution of Time in Months by Case Type",
x = "Time in Months",
y = "Density"
) +
facet_wrap(~ case_type, scales = "free") +
# Add vertical lines for mean and median
geom_vline(data = summary_stats,
aes(xintercept = Mean, color = "Mean"),
linetype = "dashed", linewidth = 1) +
geom_vline(data = summary_stats,
aes(xintercept = Median, color = "Median"),
linetype = "dotted", linewidth = 1) +
# Add text labels for mean and median
geom_text(data = summary_stats,
aes(x = Mean, y = Inf, label = paste("Mean =", round(Mean, 2))),
vjust = 1.5, hjust = -0.3, color = "red", size = 3) +
geom_text(data = summary_stats,
aes(x = Median, y = Inf, label = paste("Median =", round(Median, 2))),
vjust = 3, hjust = -0.5, color = "blue", size = 3) +
# Add normal curve
geom_line(data = normal_curves,
aes(x = x, y = density),
color = "darkgreen", linewidth = 1) +
# Add SD labels
geom_text(data = sd_labels,
aes(x = x, y = y, label = label),
color = "darkgreen", vjust = -1, hjust = -0.1, size = 3) +
# Define colors for mean and median
scale_color_manual(name = "Statistics",
values = c("Mean" = "red", "Median" = "blue")) +
theme_minimal() +
theme(legend.position = "top")

# 4.2.4
# Group by Appeals and summarize time_in_months
summary_stats <- data %>%
group_by(Appeals) %>%
summarize(
mean_time = mean(time_in_months, na.rm = TRUE),
median_time = median(time_in_months, na.rm = TRUE),
min_time = min(time_in_months, na.rm = TRUE),
max_time = max(time_in_months, na.rm = TRUE),
sd_time = sd(time_in_months, na.rm = TRUE)
)
# Print formatted table
kable(summary_stats, caption = "Summary statistics of time in months grouped by Appeals") %>%
kable_styling()
Summary statistics of time in months grouped by Appeals
|
Appeals
|
mean_time
|
median_time
|
min_time
|
max_time
|
sd_time
|
|
0
|
26.22322
|
15
|
0
|
130
|
28.47277
|
|
1
|
30.08051
|
24
|
0
|
129
|
23.09141
|
# T-test to compare time by Appeals groups
t_test_results <- t.test(time_in_months ~ Appeals, data = data)
# Print the t-test results
cat("T-test comparing time by Appeals groups:\n")
## T-test comparing time by Appeals groups:
print(t_test_results)
##
## Welch Two Sample t-test
##
## data: time_in_months by Appeals
## t = -20.669, df = 48116, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -4.223075 -3.491502
## sample estimates:
## mean in group 0 mean in group 1
## 26.22322 30.08051
# Creating the boxplot by appeals
boxplot <- ggplot(data, aes(x = factor(Appeals), y = time_in_months, fill = factor(Appeals))) +
geom_boxplot() +
labs(
title = "Boxplot of time in months by Appeals groups",
x = "Appeals",
y = "Time in Months"
) +
scale_fill_manual(
values = c("0" = "#0073C2", "1" = "#EFC000"), # Define your custom colors here
name = "Appeals",
labels = c("No Appeal", "Appealed")
) +
theme_minimal()
# Print the boxplot
print(boxplot)

# 4.2.5
# Computing summary statistics grouped by case_type and Appeals
# Compute summary statistics
summary_stats <- data %>%
group_by(case_type, Appeals = factor(Appeals)) %>%
summarise(
Mean = mean(time_in_months, na.rm = TRUE),
Median = median(time_in_months, na.rm = TRUE),
SD = sd(time_in_months, na.rm = TRUE),
n = n(),
.groups = "drop"
)
# Print styled table
kable(summary_stats,
caption = "Summary Statistics of Case Duration by Case Type and Appeal Status") %>%
kable_styling(full_width = FALSE, bootstrap_options = c("striped", "condensed"))
Summary Statistics of Case Duration by Case Type and Appeal Status
|
case_type
|
Appeals
|
Mean
|
Median
|
SD
|
n
|
|
ANTI-CORRUPTION
|
0
|
12.38110
|
6
|
14.96820
|
328
|
|
ANTI-CORRUPTION
|
1
|
20.91111
|
14
|
17.90276
|
90
|
|
CIVIL
|
0
|
27.82354
|
16
|
28.41501
|
18310
|
|
CIVIL
|
1
|
35.25588
|
31
|
24.47546
|
12838
|
|
CONSTITUTIONAL
|
0
|
21.53702
|
14
|
22.23388
|
7009
|
|
CONSTITUTIONAL
|
1
|
21.70370
|
5
|
23.89048
|
27
|
|
CRIMINAL
|
0
|
12.90239
|
2
|
20.91305
|
22211
|
|
CRIMINAL
|
1
|
22.93861
|
17
|
18.77528
|
9496
|
|
FAMILY
|
0
|
40.35727
|
35
|
30.26458
|
21505
|
|
FAMILY
|
1
|
34.19120
|
31
|
24.62111
|
591
|
# Create grouped boxplot
ggplot(data, aes(x = case_type, y = time_in_months, fill = factor(Appeals))) +
geom_boxplot() +
labs(
title = "Case Duration by Case Type and Appeal Status",
x = "Case Type",
y = "Time in Months",
fill = "Appeals"
) +
scale_fill_discrete(labels = c("No Appeal", "Appealed")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top") +
theme_minimal()

# Create survival object
surv_object1 <- Surv(data$time_in_months, data$Event)
surv_object <- Surv(clean_data$time_in_months, clean_data$Event)
#Objective 2
# Kaplan Meier Survival Curve
# Overall KM model
km_overall <- survfit(Surv(time_in_months, Event) ~ 1, data = clean_data)
# Time points
time_points <- c(12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132)
# Summary
summary_overall <- summary(km_overall, times = time_points)
# Format
km_overall_df <- data.frame(
Time = summary_overall$time,
Survival_Probability = round(summary_overall$surv, 3),
Standard_Error = round(summary_overall$std.err, 3),
CI_Lower = round(summary_overall$lower, 3),
CI_Upper = round(summary_overall$upper, 3)
)
print(km_overall_df)
## Time Survival_Probability Standard_Error CI_Lower CI_Upper
## 1 12 0.674 0.002 0.671 0.677
## 2 24 0.472 0.002 0.469 0.476
## 3 36 0.343 0.002 0.339 0.346
## 4 48 0.238 0.001 0.235 0.241
## 5 60 0.163 0.001 0.160 0.165
## 6 72 0.098 0.001 0.096 0.100
## 7 84 0.051 0.001 0.049 0.052
## 8 96 0.025 0.001 0.024 0.026
## 9 108 0.010 0.000 0.010 0.011
## 10 120 0.002 0.000 0.002 0.002
ggsurvplot(km_overall,
data = clean_data,
xlab = "Time in Months",
ylab = "Survival Probability",
title = "Overall Kaplan-Meier survival curve",
conf.int = TRUE,
break.x.by = 10,
xlim = c(0, 130))

# Survival curve by appeal status
km_appeal <- survfit(Surv(time_in_months, Event) ~ Appeals, data = clean_data)
# Summary
summary_appeal <- summary(km_appeal, times = time_points)
# Format
km_appeal_df <- data.frame(
Appeal_Status = summary_appeal$strata,
Time = summary_appeal$time,
Survival_Probability = round(summary_appeal$surv, 3),
Standard_Error = round(summary_appeal$std.err, 3),
CI_Lower = round(summary_appeal$lower, 3),
CI_Upper = round(summary_appeal$upper, 3)
)
print(km_appeal_df)
## Appeal_Status Time Survival_Probability Standard_Error CI_Lower CI_Upper
## 1 Appeals=0 12 0.644 0.002 0.640 0.648
## 2 Appeals=0 24 0.462 0.002 0.458 0.466
## 3 Appeals=0 36 0.348 0.002 0.344 0.352
## 4 Appeals=0 48 0.251 0.002 0.247 0.254
## 5 Appeals=0 60 0.178 0.002 0.175 0.181
## 6 Appeals=0 72 0.112 0.001 0.109 0.114
## 7 Appeals=0 84 0.059 0.001 0.057 0.061
## 8 Appeals=0 96 0.031 0.001 0.030 0.033
## 9 Appeals=0 108 0.013 0.000 0.013 0.014
## 10 Appeals=0 120 0.002 0.000 0.002 0.003
## 11 Appeals=1 12 0.750 0.003 0.745 0.756
## 12 Appeals=1 24 0.499 0.003 0.492 0.505
## 13 Appeals=1 36 0.330 0.003 0.324 0.336
## 14 Appeals=1 48 0.206 0.003 0.201 0.211
## 15 Appeals=1 60 0.124 0.002 0.119 0.128
## 16 Appeals=1 72 0.064 0.002 0.061 0.067
## 17 Appeals=1 84 0.030 0.001 0.027 0.032
## 18 Appeals=1 96 0.010 0.001 0.009 0.012
## 19 Appeals=1 108 0.003 0.000 0.002 0.004
## 20 Appeals=1 120 0.001 0.000 0.000 0.001
ggsurvplot(km_appeal,
xlab = "Time in Months",
ylab = "Survival Probability",
title = "Kaplan-Meier Survival Curves by Appeal Status",
pval = TRUE,
conf.int = TRUE,
legend = "right",
legend.labs = c("No Appeal", "Appealed"),
legend.title = "Appeal Status",
palette = c("steelblue", "gold"))

# Subset data
data_no_appeal <- filter(clean_data, Appeals == 0)
data_appeal <- filter(clean_data, Appeals == 1)
# Fit KM models separately
km_no_appeal <- survfit(Surv(time_in_months, Event) ~ case_type, data = data_no_appeal)
km_appeal <- survfit(Surv(time_in_months, Event) ~ case_type, data = data_appeal)
# Create individual plots
plot_no_appeal <- ggsurvplot(km_no_appeal,
data = data_no_appeal,
xlab = "Time in Months",
ylab = "Survival Probability",
title = "KM Survival Curves by Case Type (No Appeal)",
pval = TRUE,
conf.int = FALSE,
legend = "right",
legend.title = "Case Type",
break.x.by = 10,
xlim = c(0, 130),
palette = "Dark2")
plot_appeal <- ggsurvplot(km_appeal,
data = data_appeal,
xlab = "Time in Months",
ylab = "Survival Probability",
title = "KM Survival Curves by Case Type (With Appeal)",
pval = TRUE,
conf.int = FALSE,
legend = "right",
legend.title = "Case Type",
break.x.by = 10,
xlim = c(0, 130),
palette = "Dark2")
# Arrange both plots on one page
arrange_ggsurvplots(list(plot_no_appeal, plot_appeal),
ncol = 1, nrow = 2)

# Survival plot by case type
km <- survfit(Surv(time_in_months, Event) ~ case_type, data = clean_data)
# Plot survival curves
ggsurvplot(km,
xlab = "Time in Months",
ylab = "Survival Probability",
pval = TRUE,
conf.int = TRUE,
break.x.by = 10,
xlim = c(0, 130),
data = clean_data,
legend = "right",
legend.title = "Case type",
legend.labs = levels(data$case_type),
title = "Kaplan-Meier Survival curves by case type")

###################################################################
# Cox Proportional Hazards Model
# Cox PH
overall_coxph <- coxph(surv_object1 ~ case_type + Appeals, data = data)
summary(overall_coxph)
## Call:
## coxph(formula = surv_object1 ~ case_type + Appeals, data = data)
##
## n= 92405, number of events= 92404
##
## coef exp(coef) se(coef) z Pr(>|z|)
## case_typeCIVIL -0.689615 0.501769 0.049301 -13.988 <2e-16 ***
## case_typeCONSTITUTIONAL -0.469718 0.625179 0.050395 -9.321 <2e-16 ***
## case_typeCRIMINAL -0.073072 0.929534 0.049252 -1.484 0.138
## case_typeFAMILY -1.105360 0.331092 0.049488 -22.336 <2e-16 ***
## Appeals -0.312469 0.731638 0.008231 -37.963 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## case_typeCIVIL 0.5018 1.993 0.4556 0.5527
## case_typeCONSTITUTIONAL 0.6252 1.600 0.5664 0.6901
## case_typeCRIMINAL 0.9295 1.076 0.8440 1.0237
## case_typeFAMILY 0.3311 3.020 0.3005 0.3648
## Appeals 0.7316 1.367 0.7199 0.7435
##
## Concordance= 0.647 (se = 0.001 )
## Likelihood ratio test= 13211 on 5 df, p=<2e-16
## Wald test = 13712 on 5 df, p=<2e-16
## Score (logrank) test = 14296 on 5 df, p=<2e-16
# Cox for appeals
cox_appeals <- coxph(Surv(time_in_months, Event) ~ Appeals, data = clean_data)
summary(cox_appeals)
## Call:
## coxph(formula = Surv(time_in_months, Event) ~ Appeals, data = clean_data)
##
## n= 81266, number of events= 81265
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Appeals 0.060336 1.062194 0.007879 7.658 1.89e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Appeals 1.062 0.9414 1.046 1.079
##
## Concordance= 0.487 (se = 0.001 )
## Likelihood ratio test= 58.14 on 1 df, p=2e-14
## Wald test = 58.65 on 1 df, p=2e-14
## Score (logrank) test = 58.66 on 1 df, p=2e-14
# Cox by case_type
cox_case_type <- coxph(Surv(time_in_months, Event) ~ case_type, data = clean_data)
summary(cox_case_type)
## Call:
## coxph(formula = Surv(time_in_months, Event) ~ case_type, data = clean_data)
##
## n= 81266, number of events= 81265
##
## coef exp(coef) se(coef) z
## case_typeCIVIL -0.77922 0.45876 0.05194 -15.003
## case_typeCONSTITUTIONAL & HUMAN RIGHTS -0.44440 0.64121 0.05307 -8.374
## case_typeCRIMINAL -0.36310 0.69552 0.05200 -6.982
## case_typeFAMILY -1.10681 0.33061 0.05212 -21.238
## Pr(>|z|)
## case_typeCIVIL < 2e-16 ***
## case_typeCONSTITUTIONAL & HUMAN RIGHTS < 2e-16 ***
## case_typeCRIMINAL 2.91e-12 ***
## case_typeFAMILY < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## case_typeCIVIL 0.4588 2.180 0.4144 0.5079
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.6412 1.560 0.5779 0.7115
## case_typeCRIMINAL 0.6955 1.438 0.6281 0.7701
## case_typeFAMILY 0.3306 3.025 0.2985 0.3662
##
## Concordance= 0.594 (se = 0.001 )
## Likelihood ratio test= 6538 on 4 df, p=<2e-16
## Wald test = 6586 on 4 df, p=<2e-16
## Score (logrank) test = 6784 on 4 df, p=<2e-16
# Weibull Model, accepts non-zero time and therefore wee use the subste of non-zero data
weibull <- survreg(Surv(time_in_months, Event) ~ Appeals + case_type, dist="weibull", data = clean_data)
summary(weibull)
##
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type,
## data = clean_data, dist = "weibull")
## Value Std. Error z p
## (Intercept) 2.76711 0.04666 59.31 < 2e-16
## Appeals 0.12984 0.00782 16.60 < 2e-16
## case_typeCIVIL 0.67352 0.04688 14.37 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.43235 0.04797 9.01 < 2e-16
## case_typeCRIMINAL 0.30997 0.04696 6.60 4.1e-11
## case_typeFAMILY 0.99159 0.04704 21.08 < 2e-16
## Log(scale) -0.10188 0.00280 -36.36 < 2e-16
##
## Scale= 0.903
##
## Weibull distribution
## Loglik(model)= -356684.5 Loglik(intercept only)= -359795.2
## Chisq= 6221.5 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 7
## n= 81266
# Exponential Model
exponential <- survreg(Surv(time_in_months , Event)~ Appeals + case_type, dist="exponential", data = clean_data)
summary(exponential)
##
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type,
## data = clean_data, dist = "exponential")
## Value Std. Error z p
## (Intercept) 2.71436 0.05163 52.57 < 2e-16
## Appeals 0.15319 0.00863 17.75 < 2e-16
## case_typeCIVIL 0.67858 0.05191 13.07 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.44341 0.05311 8.35 < 2e-16
## case_typeCRIMINAL 0.30735 0.05200 5.91 3.4e-09
## case_typeFAMILY 1.01705 0.05208 19.53 < 2e-16
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -357315.9 Loglik(intercept only)= -360096.1
## Chisq= 5560.28 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 5
## n= 81266
# Log-normal Model
lognormal <- survreg(Surv(time_in_months , Event) ~ Appeals + case_type, dist="lognormal", data = clean_data)
summary(lognormal)
##
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type,
## data = clean_data, dist = "lognormal")
## Value Std. Error z p
## (Intercept) 2.11022 0.05790 36.45 <2e-16
## Appeals 0.50621 0.00964 52.53 <2e-16
## case_typeCIVIL 0.62736 0.05824 10.77 <2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.53741 0.05956 9.02 <2e-16
## case_typeCRIMINAL 0.18145 0.05835 3.11 0.0019
## case_typeFAMILY 1.25950 0.05840 21.57 <2e-16
## Log(scale) 0.11502 0.00248 46.37 <2e-16
##
## Scale= 1.12
##
## Log Normal distribution
## Loglik(model)= -360923.4 Loglik(intercept only)= -365726.5
## Chisq= 9606.11 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 3
## n= 81266
# Log-logistic Model
loglogistic <- survreg(Surv(time_in_months , Event) ~ Appeals + case_type, dist="loglogistic", data = clean_data)
summary(loglogistic)
##
## Call:
## survreg(formula = Surv(time_in_months, Event) ~ Appeals + case_type,
## data = clean_data, dist = "loglogistic")
## Value Std. Error z p
## (Intercept) 2.12486 0.05827 36.46 < 2e-16
## Appeals 0.48368 0.00969 49.91 < 2e-16
## case_typeCIVIL 0.70377 0.05864 12.00 < 2e-16
## case_typeCONSTITUTIONAL & HUMAN RIGHTS 0.58755 0.05994 9.80 < 2e-16
## case_typeCRIMINAL 0.23650 0.05877 4.02 5.7e-05
## case_typeFAMILY 1.32960 0.05873 22.64 < 2e-16
## Log(scale) -0.44182 0.00291 -152.07 < 2e-16
##
## Scale= 0.643
##
## Log logistic distribution
## Loglik(model)= -361847.7 Loglik(intercept only)= -366605.4
## Chisq= 9515.4 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 3
## n= 81266
# Gamma Model
gamma_model <- flexsurvreg(Surv(time_in_months, Event) ~ Appeals + case_type,
dist = "gamma", data = clean_data)
summary(gamma_model)
## Appeals=0.279329608938547,case_typeCIVIL=0.371299190313292,case_typeCONSTITUTIONAL & HUMAN RIGHTS=0.0792582383776733,case_typeCRIMINAL=0.28383333743509,case_typeFAMILY=0.26098245268624
## time est lcl ucl
## 1 1 0.979006919 0.978371425 0.979593066
## 2 2 0.954061755 0.952999000 0.955073893
## 3 3 0.927981222 0.926576318 0.929338098
## 4 4 0.901496309 0.899809108 0.903123467
## 5 5 0.874971510 0.873061215 0.876824917
## 6 6 0.848623305 0.846519900 0.850645965
## 7 7 0.822591746 0.820393231 0.824729670
## 8 8 0.796971762 0.794630439 0.799230048
## 9 9 0.771829267 0.769388030 0.774176965
## 10 10 0.747210389 0.744739929 0.749638188
## 11 11 0.723147182 0.720629719 0.725665123
## 12 12 0.699661369 0.697099741 0.702203365
## 13 13 0.676766907 0.674183512 0.679374003
## 14 14 0.654471806 0.651905990 0.657132675
## 15 15 0.632779463 0.630256727 0.635483740
## 16 16 0.611689659 0.609157125 0.614437259
## 17 17 0.591199320 0.588682686 0.593951020
## 18 18 0.571303108 0.568760454 0.574024721
## 19 19 0.551993888 0.549459424 0.554710175
## 20 20 0.533263101 0.530720184 0.535982655
## 21 21 0.515101060 0.512559136 0.517818120
## 22 22 0.497497195 0.494948172 0.500207056
## 23 23 0.480440254 0.477918120 0.483138155
## 24 24 0.463918461 0.461374834 0.466600304
## 25 25 0.447919659 0.445379255 0.450581620
## 26 26 0.432431418 0.429902875 0.435070439
## 27 27 0.417441128 0.414912588 0.420071472
## 28 28 0.402936081 0.400415463 0.405539288
## 29 29 0.388903536 0.386415889 0.391482055
## 30 30 0.375330769 0.372863514 0.377920754
## 31 31 0.362205127 0.359760996 0.364760588
## 32 32 0.349514059 0.347094871 0.352089886
## 33 33 0.337245154 0.334822717 0.339811121
## 34 34 0.325386160 0.322994172 0.327913161
## 35 35 0.313925014 0.311575180 0.316389549
## 36 36 0.302849856 0.300519313 0.305305427
## 37 37 0.292149041 0.289845663 0.294590046
## 38 38 0.281811158 0.279543496 0.284199707
## 39 39 0.271825030 0.269576640 0.274198754
## 40 40 0.262179729 0.259942959 0.264514572
## 41 41 0.252864574 0.250641373 0.255174941
## 42 42 0.243869142 0.241661828 0.246162715
## 43 43 0.235183264 0.232995572 0.237455129
## 44 44 0.226797029 0.224624874 0.229054190
## 45 45 0.218700783 0.216548842 0.220940816
## 46 46 0.210885130 0.208769131 0.213110642
## 47 47 0.203340925 0.201264551 0.205540554
## 48 48 0.196059280 0.194008662 0.198241602
## 49 49 0.189031553 0.187004707 0.191197748
## 50 50 0.182249351 0.180246934 0.184398164
## 51 51 0.175704523 0.173726734 0.177842486
## 52 52 0.169389155 0.167437258 0.171514551
## 53 53 0.163295571 0.161370307 0.165406775
## 54 54 0.157416321 0.155518341 0.159511828
## 55 55 0.151744183 0.149872014 0.153822564
## 56 56 0.146272156 0.144419961 0.148306345
## 57 57 0.140993453 0.139174430 0.142987974
## 58 58 0.135901498 0.134115962 0.137873491
## 59 59 0.130989923 0.129234238 0.132930059
## 60 60 0.126252558 0.124526262 0.128165454
## 61 61 0.121683430 0.119982404 0.123564326
## 62 62 0.117276757 0.115617544 0.119131276
## 63 63 0.113026942 0.111391774 0.114837988
## 64 64 0.108928570 0.107319708 0.110713233
## 65 65 0.104976399 0.103391523 0.106734296
## 66 66 0.101165362 0.099611831 0.102896162
## 67 67 0.097490554 0.095968075 0.099193980
## 68 68 0.093947234 0.092455384 0.095611706
## 69 69 0.090530817 0.089069647 0.092155849
## 70 70 0.087236869 0.085806194 0.088832368
## 71 71 0.084061107 0.082659896 0.085627106
## 72 72 0.080999387 0.079626969 0.082535952
## 73 73 0.078047706 0.076701691 0.079564870
## 74 74 0.075202197 0.073881389 0.076699020
## 75 75 0.072459120 0.071163490 0.073922745
## 76 76 0.069814866 0.068544380 0.071245554
## 77 77 0.067265944 0.066020595 0.068664142
## 78 78 0.064808985 0.063592465 0.066175150
## 79 79 0.062440735 0.061258185 0.063775332
## 80 80 0.060158048 0.059005959 0.061464658
## 81 81 0.057957891 0.056831914 0.059236808
## 82 82 0.055837330 0.054737102 0.057088849
## 83 83 0.053793536 0.052718686 0.055017965
## 84 84 0.051823775 0.050773926 0.053021355
## 85 85 0.049925410 0.048899822 0.051096448
## 86 86 0.048095893 0.047090038 0.049240740
## 87 87 0.046332766 0.045346705 0.047451779
## 88 88 0.044633656 0.043670448 0.045727202
## 89 89 0.042996273 0.042057152 0.044064724
## 90 90 0.041418407 0.040502139 0.042462141
## 91 91 0.039897924 0.039004103 0.040917389
## 92 92 0.038432766 0.037561072 0.039429819
## 93 93 0.037020947 0.036168429 0.037993574
## 94 94 0.035660551 0.034826302 0.036609674
## 95 95 0.034349728 0.033533546 0.035276187
## 96 96 0.033086694 0.032291480 0.033990881
## 97 97 0.031869730 0.031095661 0.032752035
## 98 98 0.030697173 0.029942576 0.031557990
## 99 99 0.029567424 0.028830620 0.030408869
## 100 100 0.028478937 0.027759635 0.029302003
## 101 101 0.027430221 0.026728129 0.028235042
## 102 102 0.026419840 0.025734662 0.027204922
## 103 103 0.025446407 0.024777906 0.026214952
## 104 104 0.024508586 0.023856237 0.025260521
## 105 105 0.023605087 0.022968276 0.024339932
## 106 106 0.022734667 0.022113130 0.023452675
## 107 107 0.021896127 0.021289600 0.022597533
## 108 108 0.021088310 0.020496528 0.021773336
## 109 109 0.020310102 0.019734223 0.020977789
## 110 110 0.019560427 0.019000187 0.020209845
## 111 111 0.018838249 0.018292262 0.019469155
## 112 112 0.018142569 0.017610546 0.018755437
## 113 113 0.017472422 0.016954076 0.018067746
## 114 114 0.016826880 0.016321926 0.017407193
## 115 115 0.016205048 0.015713203 0.016769008
## 116 116 0.015606061 0.015127046 0.016154081
## 117 117 0.015029087 0.014562625 0.015561572
## 118 118 0.014473324 0.014019142 0.014990671
## 119 119 0.013937998 0.013495826 0.014440578
## 120 120 0.013422363 0.012991934 0.013910556
## 121 121 0.012925702 0.012506752 0.013399882
## 122 122 0.012447320 0.012039589 0.012907838
## 123 123 0.011986550 0.011589783 0.012433759
## 124 124 0.011542749 0.011156697 0.011977001
## 125 125 0.011115296 0.010739686 0.011536935
## 126 126 0.010703592 0.010337558 0.011112955
## 127 127 0.010307063 0.009950408 0.010704637
## 128 128 0.009925151 0.009577710 0.010311641
## 129 129 0.009557323 0.009219438 0.009933004
## 130 130 0.009203062 0.008875717 0.009568206
# Objective 3
# Compare models using AIC
AIC(overall_coxph, weibull, exponential, lognormal, loglogistic, gamma_model)
## Warning in AIC.default(overall_coxph, weibull, exponential, lognormal,
## loglogistic, : models are not all fitted to the same number of observations
## df AIC
## overall_coxph 5 1915086.7
## weibull 7 713383.0
## exponential 6 714643.8
## lognormal 7 721860.8
## loglogistic 7 723709.4
## gamma_model 7 713569.9
Appendix : Counties
# Group by County_No and County, count cases, and add a total row
court_case_counts <- data %>%
group_by(County_No, court_name, County) %>%
summarize(count = n()) %>%
ungroup() %>%
mutate(total = sum(count)) %>%
select(County_No, court_name, County, count)
## `summarise()` has grouped output by 'County_No', 'court_name'. You can override
## using the `.groups` argument.
# Add a total row
court_counts_with_total <- court_case_counts %>%
adorn_totals("row")
# Print formatted table with total
court_name_tables <- court_counts_with_total %>%
kable(format = "html", caption = "Number of cases per court") %>%
kable_styling()
court_name_tables
Number of cases per court
|
County_No
|
court_name
|
County
|
count
|
|
1
|
Mombasa High Court
|
Mombasa County
|
4555
|
|
3
|
Malindi High Court
|
Kilifi County
|
1740
|
|
4
|
Garsen High Court
|
Tana River County
|
557
|
|
6
|
Voi High Court
|
Taita Taveta County
|
966
|
|
7
|
Garissa High Court
|
Garissa County
|
182
|
|
10
|
Marsabit High Court
|
Marsabit County
|
391
|
|
12
|
Meru High Court
|
Meru County
|
4321
|
|
13
|
Chuka High Court
|
Tharaka-Nithi County
|
950
|
|
14
|
Embu High Court
|
Embu County
|
1439
|
|
15
|
Kitui High Court
|
Kitui County
|
1603
|
|
16
|
Machakos High Court
|
Machakos County
|
3799
|
|
17
|
Makueni High Court
|
Makueni County
|
1640
|
|
18
|
Nyandarua High Court
|
Nyandarua County
|
640
|
|
19
|
Nyeri High Court
|
Nyeri County
|
1607
|
|
20
|
Kerugoya High Court
|
Kirinyaga County
|
1211
|
|
21
|
Muranga High Court
|
Murang’a County
|
1787
|
|
22
|
Kiambu High Court
|
Kiambu County
|
3462
|
|
23
|
Lodwar High Court
|
Turkana County
|
326
|
|
24
|
Kapenguria High Court
|
West Pokot County
|
270
|
|
26
|
Kitale High Court
|
Trans-Nzoia County
|
2410
|
|
27
|
Eldoret High Court
|
Uasin Gishu County
|
4139
|
|
30
|
Kabarnet High Court
|
Baringo County
|
861
|
|
31
|
Nanyuki High Court
|
Laikipia County
|
706
|
|
32
|
Naivasha High Court
|
Nakuru County
|
1615
|
|
32
|
Nakuru High Court
|
Nakuru County
|
6692
|
|
33
|
Narok High Court
|
Narok County
|
670
|
|
34
|
Kajiado High Court
|
Kajiado County
|
1258
|
|
35
|
Kericho High Court
|
Kericho County
|
1202
|
|
36
|
Bomet High Court
|
Bomet County
|
681
|
|
37
|
Kakamega High Court
|
Kakamega County
|
2549
|
|
38
|
Vihiga High Court
|
Vihiga County
|
71
|
|
39
|
Bungoma High Court
|
Bungoma County
|
2092
|
|
40
|
Busia High Court
|
Busia County
|
1170
|
|
41
|
Siaya High Court
|
Siaya County
|
2279
|
|
42
|
Kisumu High Court
|
Kisumu County
|
3160
|
|
43
|
Homabay High Court
|
Homa Bay County
|
1881
|
|
44
|
Migori High Court
|
Migori County
|
1844
|
|
45
|
Kisii High Court
|
Kisii County
|
3184
|
|
46
|
Nyamira High Court
|
Nyamira County
|
1184
|
|
47
|
Milimani AntiCorruption & Economic Crimes Division
|
Nairobi County
|
418
|
|
47
|
Milimani Civil Division
|
Nairobi County
|
6194
|
|
47
|
Milimani Commercial & Tax Division
|
Nairobi County
|
20
|
|
47
|
Milimani Constitutional Law & Human Rights Division
|
Nairobi County
|
1939
|
|
47
|
Milimani Criminal Division
|
Nairobi County
|
2032
|
|
47
|
Milimani Family Division
|
Nairobi County
|
9116
|
|
47
|
Milimani Judicial Review Division
|
Nairobi County
|
1592
|
|
Total
|
|
|
92405
|