# Load necessary libraries
library(readxl) # For reading Excel files
library(dplyr) # For data manipulation
library(irr) # For Kappa statistic
library(writexl)
# Count Pos/Neg for each column
freq_table <- sapply(data[, -1], function(x) table(x))
freq_df <- as.data.frame(t(freq_table))
freq_df
# Function to calculate prevalence and CI
calculate_prevalence <- function(test_column) {
positives <- sum(test_column == "Pos") # Count the number of positives
total <- length(test_column) # Total number of participants
prevalence <- positives / total # Prevalence
# Calculate 95% CI for prevalence
ci <- prop.test(positives, total, conf.level = 0.95)
return(c(Prevalence = prevalence, CI_Lower = ci$conf.int[1], CI_Upper = ci$conf.int[2]))
}
# Apply the function to each diagnostic method
prevalence_combined_EV <- calculate_prevalence(data$Combined_EV)
prevalence_kk_EV <- calculate_prevalence(data$KK_EV)
prevalence_snf_EV <- calculate_prevalence(data$SNF_EV)
prevalence_combined_TT <- calculate_prevalence(data$Combined_TT)
prevalence_kk_TT <- calculate_prevalence(data$KK_TT)
prevalence_snf_TT <- calculate_prevalence(data$SNF_TT)
prevalence_combined_AL <- calculate_prevalence(data$Combined_AL)
prevalence_kk_AL <- calculate_prevalence(data$KK_AL)
prevalence_snf_AL <- calculate_prevalence(data$SNF_AL)
prevalence_combined_HW <- calculate_prevalence(data$Combined_HW)
prevalence_kk_HW <- calculate_prevalence(data$KK_HW)
prevalence_snf_HW <- calculate_prevalence(data$SNF_HW)
prevalence_baermann_HW_SG <- calculate_prevalence(data$Baermann_HW_SG)
prevalence_combined_STHs <- calculate_prevalence(data$STHs_CombiNeged)
prevalence_kk_STHs <- calculate_prevalence(data$STHs_KK)
prevalence_snf_STHs <- calculate_prevalence(data$STHs_SNF)
prevalence_STHs_baermann <- calculate_prevalence(data$STHs_Baermann)
prevalence_table <- data.frame(
Diagnostic_Technique = c(
"Combined_EV", "KK_EV", "SNF_EV",
"Combined_TT", "KK_TT", "SNF_TT",
"Combined_AL", "KK_AL", "SNF_AL",
"Combined_HW", "KK_HW", "SNF_HW",
"Baermann_HW_SG",
"Combined_STHs", "KK_STHs", "SNF_STHs", "STHs_Baermann"
),
Prevalence = c(
prevalence_combined_EV[1], prevalence_kk_EV[1], prevalence_snf_EV[1],
prevalence_combined_TT[1], prevalence_kk_TT[1], prevalence_snf_TT[1],
prevalence_combined_AL[1], prevalence_kk_AL[1], prevalence_snf_AL[1],
prevalence_combined_HW[1], prevalence_kk_HW[1], prevalence_snf_HW[1],
prevalence_baermann_HW_SG[1],
prevalence_combined_STHs[1], prevalence_kk_STHs[1], prevalence_snf_STHs[1],
prevalence_STHs_baermann[1]
),
CI_Lower = c(
prevalence_combined_EV[2], prevalence_kk_EV[2], prevalence_snf_EV[2],
prevalence_combined_TT[2], prevalence_kk_TT[2], prevalence_snf_TT[2],
prevalence_combined_AL[2], prevalence_kk_AL[2], prevalence_snf_AL[2],
prevalence_combined_HW[2], prevalence_kk_HW[2], prevalence_snf_HW[2],
prevalence_baermann_HW_SG[2],
prevalence_combined_STHs[2], prevalence_kk_STHs[2], prevalence_snf_STHs[2],
prevalence_STHs_baermann[2]
),
CI_Upper = c(
prevalence_combined_EV[3], prevalence_kk_EV[3], prevalence_snf_EV[3],
prevalence_combined_TT[3], prevalence_kk_TT[3], prevalence_snf_TT[3],
prevalence_combined_AL[3], prevalence_kk_AL[3], prevalence_snf_AL[3],
prevalence_combined_HW[3], prevalence_kk_HW[3], prevalence_snf_HW[3],
prevalence_baermann_HW_SG[3],
prevalence_combined_STHs[3], prevalence_kk_STHs[3], prevalence_snf_STHs[3],
prevalence_STHs_baermann[3]
)
)
print(prevalence_table)
# Function to calculate sensitivity, specificity, PPV, and NPV
calculate_metrics <- function(conf_matrix) {
TP <- conf_matrix[2, 2] # True Positive
FP <- conf_matrix[1, 2] # False Positive
TN <- conf_matrix[1, 1] # True Negative
FN <- conf_matrix[2, 1] # False Negative
# Sensitivity = TP / (TP + FN)
sensitivity <- TP / (TP + FN)
# Specificity = TN / (TN + FP)
specificity <- TN / (TN + FP)
# PPV = TP / (TP + FP)
ppv <- TP / (TP + FP)
# NPV = TN / (TN + FN)
npv <- TN / (TN + FN)
# Calculate 95% CI for each metric using prop.test
sensitivity_ci <- prop.test(TP, TP + FN, conf.level = 0.95)$conf.int
specificity_ci <- prop.test(TN, TN + FP, conf.level = 0.95)$conf.int
ppv_ci <- prop.test(TP, TP + FP, conf.level = 0.95)$conf.int
npv_ci <- prop.test(TN, TN + FN, conf.level = 0.95)$conf.int
return(data.frame(
Sensitivity = round(sensitivity * 100, 2),
Sensitivity_Lower_CI = round(sensitivity_ci[1] * 100, 2),
Sensitivity_Upper_CI = round(sensitivity_ci[2] * 100, 2),
Specificity = round(specificity * 100, 2),
Specificity_Lower_CI = round(specificity_ci[1] * 100, 2),
Specificity_Upper_CI = round(specificity_ci[2] * 100, 2),
PPV = round(ppv * 100, 2),
PPV_Lower_CI = round(ppv_ci[1] * 100, 2),
PPV_Upper_CI = round(ppv_ci[2] * 100, 2),
NPV = round(npv * 100, 2),
NPV_Lower_CI = round(npv_ci[1] * 100, 2),
NPV_Upper_CI = round(npv_ci[2] * 100, 2)
))
}
# Gold standards for each species
gold_standards <- list(
AL = "Combined_AL", # A. lumbricoides uses Combined_AL as the gold standard
HW = "Combined_HW", # Hookworm uses Combined_HW as the gold standard
TT = "Combined_TT", # T. trichiura uses Combined_TT as the gold standard
EV = "Combined_EV", # E. vermicularis uses Combined_EV as the gold standard
STHs = "STHs_CombiNeged" # STHs (all species combined) uses STHs_CombiNeged as the gold standard
)
# List of diagnostic methods (your actual diagnostic test columns in your dataset)
diagnostic_methods <- c("STHs_KK", "KK_EV", "KK_TT", "KK_AL", "KK_HW",
"STHs_SNF", "SNF_EV", "SNF_TT", "SNF_AL", "SNF_HW",
"STHs_Baermann", "Baermann_HW_SG")
# Create an empty data frame to store results
results_table <- data.frame()
# Loop through each diagnostic method and calculate metrics
for (test in diagnostic_methods) {
# Get the species corresponding to each test method
if (grepl("KK_", test)) {
species <- sub("KK_", "", test) # Extract the species name from the test (e.g., KK_AL -> AL)
} else if (grepl("SNF_", test)) {
species <- sub("SNF_", "", test) # Extract the species name from the test (e.g., SNF_EV -> EV)
} else if (grepl("Baermann", test)) {
species <- "STHs" # Baermann methods will use STHs combined data
} else {
species <- "STHs" # Other tests like STHs_KK use combined data
}
# Define the gold standard for this species
gold_standard <- gold_standards[[species]]
# Create confusion matrix for each test vs the gold standard (species-specific gold standard)
conf_matrix <- table(data[[gold_standard]], data[[test]])
# Calculate sensitivity, specificity, PPV, and NPV for each method
metrics <- calculate_metrics(conf_matrix)
# Add results to the results table
results_table <- rbind(results_table, data.frame(
Diagnostic_Technique = test,
Species = species,
Sensitivity = metrics$Sensitivity,
Sensitivity_Lower_CI = metrics$Sensitivity_Lower_CI,
Sensitivity_Upper_CI = metrics$Sensitivity_Upper_CI,
Specificity = metrics$Specificity,
Specificity_Lower_CI = metrics$Specificity_Lower_CI,
Specificity_Upper_CI = metrics$Specificity_Upper_CI,
PPV = metrics$PPV,
PPV_Lower_CI = metrics$PPV_Lower_CI,
PPV_Upper_CI = metrics$PPV_Upper_CI,
NPV = metrics$NPV,
NPV_Lower_CI = metrics$NPV_Lower_CI,
NPV_Upper_CI = metrics$NPV_Upper_CI
))
}
# Print the results table
print(results_table)
# Function to calculate frequencies of positive and negative cases for each diagnostic method
calculate_frequencies <- function(test_column, species_column) {
# Create a contingency table for the species and the diagnostic method
freq_table <- table(species_column, test_column)
return(freq_table)
}
# Create an empty data frame to store results
frequencies_table <- data.frame()
# List of diagnostic methods (your actual diagnostic test columns in your dataset)
diagnostic_methods <- c("STHs_KK", "KK_EV", "KK_TT", "KK_AL", "KK_HW",
"STHs_SNF", "SNF_EV", "SNF_TT", "SNF_AL", "SNF_HW",
"STHs_Baermann", "Baermann_HW_SG")
# Loop through each diagnostic method and calculate frequencies
for (test in diagnostic_methods) {
# Get the species corresponding to each test method
if (grepl("KK_", test)) {
species <- sub("KK_", "", test) # Extract the species name from the test (e.g., KK_AL -> AL)
} else if (grepl("SNF_", test)) {
species <- sub("SNF_", "", test) # Extract the species name from the test (e.g., SNF_EV -> EV)
} else if (grepl("Baermann", test)) {
species <- "STHs" # Baermann methods will use STHs combined data
} else {
species <- "STHs" # Other tests like STHs_KK use combined data
}
# Define the gold standard for this species
gold_standard <- gold_standards[[species]]
# Calculate frequencies for each diagnostic method and species
freq_table <- calculate_frequencies(data[[test]], data[[gold_standard]])
# Add the frequencies table to the results
frequencies_table <- rbind(frequencies_table, data.frame(
Diagnostic_Technique = test,
Species = species,
Positives = freq_table["Pos", "Pos"], # Positive cases
Negatives = freq_table["Neg", "Neg"] # Negative cases
))
}
# Print the frequencies table
print(frequencies_table)
# Function to calculate prevalence and 95% CI
calculate_prevalence <- function(test_column, total_participants) {
positives <- sum(test_column == "Pos") # Count the number of positives
prevalence <- positives / total_participants * 100 # Prevalence in percentage
# Calculate 95% CI for prevalence using prop.test
ci <- prop.test(positives, total_participants, conf.level = 0.95)
return(c(Prevalence = prevalence, CI_Lower = ci$conf.int[1] * 100, CI_Upper = ci$conf.int[2] * 100))
}
# Total participants
total_participants <- 333
# Assuming 'data' contains your dataset and "KK" and "SNF" are the respective columns
# Prevalence for KK method
prevalence_KK <- calculate_prevalence(data$STHs_KK, total_participants)
# Prevalence for SNF method
prevalence_SNF <- calculate_prevalence(data$STHs_SNF, total_participants)
# Prevalence for combined KK and SNF method (i.e., any positive result in either KK or SNF)
combined_KK_SNF <- calculate_prevalence((data$STHs_KK == "Pos" | data$STHs_SNF == "Pos"), total_participants)
# Prepare the results table
results <- data.frame(
Method = c("KK", "SNF", "KK + SNF"),
Total_Examined = rep(total_participants, 3),
Positives = c(sum(data$STHs_KK == "Pos"), sum(data$STHs_SNF == "Pos"), sum(data$STHs_KK == "Pos" | data$STHs_SNF == "Pos")),
Prevalence = c(prevalence_KK[1], prevalence_SNF[1], combined_KK_SNF[1]),
CI_Lower = c(prevalence_KK[2], prevalence_SNF[2], combined_KK_SNF[2]),
CI_Upper = c(prevalence_KK[3], prevalence_SNF[3], combined_KK_SNF[3])
)
# Print the results
print(results)
positives <- 176
total_participants <- 333
# Use prop.test to calculate the 95% confidence interval for the proportion of positives
ci_result <- prop.test(positives, total_participants, conf.level = 0.95)
# Print the result
ci_result$conf.int
[1] 0.4734004 0.5829878
attr(,"conf.level")
[1] 0.95
################################################
# Function to calculate manual Cohen's Kappa
manual_kappa <- function(conf_matrix) {
TP <- conf_matrix[2, 2] # True Positives
FP <- conf_matrix[1, 2] # False Positives
TN <- conf_matrix[1, 1] # True Negatives
FN <- conf_matrix[2, 1] # False Negatives
P_o <- (TP + TN) / sum(conf_matrix) # Observed Agreement
P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix)) # Expected Agreement
kappa <- (P_o - P_e) / (1 - P_e)
return(kappa)
}
# Example confusion matrix for KK_AL vs Combined_AL
kk_al_table <- table(data$KK_AL, data$Combined_AL)
# Calculate manual Kappa for each comparison
kappa_kk_al <- manual_kappa(kk_al_table)
kappa_kk_al
[1] 0.9552651
#Calculate for snf_al,kk_ev,snf_ev
kappa_snf_al <- manual_kappa(table(data$SNF_AL, data$Combined_AL))
kappa_kk_ev <- manual_kappa(table(data$KK_EV, data$Combined_EV))
kappa_snf_ev <- manual_kappa(table(data$SNF_EV, data$Combined_EV))
kappa_kk_tt <- manual_kappa(table(data$KK_TT, data$Combined_TT))
kappa_snf_tt <- manual_kappa(table(data$SNF_TT, data$Combined_TT))
kappa_kk_hw <- manual_kappa(table(data$KK_HW, data$Combined_HW))
kappa_snf_hw <- manual_kappa(table(data$SNF_HW, data$Combined_HW))
kappa_kk_STHs <- manual_kappa(table(data$STHs_KK, data$STHs_CombiNeged))
kappa_snf_STHs <- manual_kappa(table(data$STHs_SNF, data$STHs_CombiNeged))
kappa_bm_hw <- manual_kappa(table(data$Baermann_HW_SG, data$Combined_HW))
kappa_bm_STH <- manual_kappa(table(data$STHs_Baermann, data$STHs_CombiNeged))
print(kappa_snf_al)
[1] 0.2362215
print(kappa_kk_ev)
[1] 0.5457026
print(kappa_snf_ev)
[1] 0.9135738
print(kappa_kk_tt)
[1] 0.9596999
print(kappa_snf_tt)
[1] 0.6788337
print(kappa_kk_hw)
[1] 0.4547615
print(kappa_snf_hw)
[1] 0.7283604
print(kappa_kk_STHs)
[1] 0.8388045
print(kappa_snf_STHs)
[1] 0.39979
print(kappa_bm_hw)
[1] 0.6723516
print(kappa_bm_STH)
[1] 0.1235787
##########################
###########################
# Function to calculate manual Cohen's Kappa and p-value
manual_kappa_with_pvalue <- function(conf_matrix, n) {
TP <- conf_matrix[2, 2] # True Positives
FP <- conf_matrix[1, 2] # False Positives
TN <- conf_matrix[1, 1] # True Negatives
FN <- conf_matrix[2, 1] # False Negatives
P_o <- (TP + TN) / sum(conf_matrix) # Observed Agreement
P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix)) # Expected Agreement
kappa <- (P_o - P_e) / (1 - P_e)
# Calculate the z-score for Cohen's Kappa
z <- kappa / sqrt((1 - kappa^2) / (n - 1))
# Calculate the p-value using the z-score
p_value <- 2 * (1 - pnorm(abs(z))) # Two-tailed p-value
return(list(Kappa = kappa, p_value = p_value))
}
# Example for KK_AL vs Combined_AL (confusion matrix already given)
kk_al_table <- table(data$KK_AL, data$Combined_AL)
# Total number of subjects (replace with your actual n)
n <- sum(kk_al_table) # Total number of subjects
# Calculate manual Kappa and p-value
kappa_kk_al_result <- manual_kappa_with_pvalue(kk_al_table, n)
# Print Kappa and p-value
print(paste("Kappa: ", kappa_kk_al_result$Kappa))
[1] "Kappa: 0.955265127526052"
print(paste("p-value: ", kappa_kk_al_result$p_value))
[1] "p-value: 0"
# Repeat for other comparisons
kappa_snf_al_result <- manual_kappa_with_pvalue(table(data$SNF_AL, data$Combined_AL), n)
kappa_kk_ev_result <- manual_kappa_with_pvalue(table(data$KK_EV, data$Combined_EV), n)
kappa_snf_ev_result <- manual_kappa_with_pvalue(table(data$SNF_EV, data$Combined_EV), n)
kappa_kk_tt_result <- manual_kappa_with_pvalue(table(data$KK_TT, data$Combined_TT), n)
kappa_snf_tt_result <- manual_kappa_with_pvalue(table(data$SNF_TT, data$Combined_TT), n)
kappa_kk_hw_result <- manual_kappa_with_pvalue(table(data$KK_HW, data$Combined_HW), n)
kappa_snf_hw_result <- manual_kappa_with_pvalue(table(data$SNF_HW, data$Combined_HW), n)
kappa_kk_sths_result <- manual_kappa_with_pvalue(table(data$STHs_KK, data$STHs_CombiNeged), n)
kappa_snf_sths_result <- manual_kappa_with_pvalue(table(data$STHs_SNF, data$STHs_CombiNeged), n)
kappa_bm_hw_result <- manual_kappa_with_pvalue(table(data$Baermann_HW_SG, data$Combined_HW), n)
kappa_bm_sth_result <- manual_kappa_with_pvalue(table(data$STHs_Baermann, data$STHs_CombiNeged), n)
# Print results
print(kappa_snf_al_result)
$Kappa
[1] 0.2362215
$p_value
[1] 9.444312e-06
print(kappa_kk_ev_result)
$Kappa
[1] 0.5457026
$p_value
[1] 0
print(kappa_snf_ev_result)
$Kappa
[1] 0.9135738
$p_value
[1] 0
print(kappa_kk_tt_result)
$Kappa
[1] 0.9596999
$p_value
[1] 0
print(kappa_snf_tt_result)
$Kappa
[1] 0.6788337
$p_value
[1] 0
print(kappa_kk_hw_result)
$Kappa
[1] 0.4547615
$p_value
[1] 0
print(kappa_snf_hw_result)
$Kappa
[1] 0.7283604
$p_value
[1] 0
print(kappa_kk_sths_result)
$Kappa
[1] 0.8388045
$p_value
[1] 0
print(kappa_snf_sths_result)
$Kappa
[1] 0.39979
$p_value
[1] 1.998401e-15
print(kappa_bm_hw_result)
$Kappa
[1] 0.6723516
$p_value
[1] 0
print(kappa_bm_sth_result)
$Kappa
[1] 0.1235787
$p_value
[1] 0.023262
#95% CI of kappa
# Function to calculate manual Cohen's Kappa, p-value, and 95% CI
manual_kappa_with_ci <- function(conf_matrix, n) {
TP <- conf_matrix[2, 2] # True Positives
FP <- conf_matrix[1, 2] # False Positives
TN <- conf_matrix[1, 1] # True Negatives
FN <- conf_matrix[2, 1] # False Negatives
P_o <- (TP + TN) / sum(conf_matrix) # Observed Agreement
P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix)) # Expected Agreement
kappa <- (P_o - P_e) / (1 - P_e)
# Calculate the standard error (SE) of Cohen's Kappa
se_kappa <- sqrt((1 - kappa^2) / (n - 1))
# Calculate the z-score for Cohen's Kappa
z <- kappa / se_kappa
# Calculate the p-value using the z-score
p_value <- 2 * (1 - pnorm(abs(z))) # Two-tailed p-value
# Calculate the 95% Confidence Interval for Cohen's Kappa
lower_ci <- kappa - 1.96 * se_kappa
upper_ci <- kappa + 1.96 * se_kappa
return(list(Kappa = kappa, p_value = p_value, Lower_CI = lower_ci, Upper_CI = upper_ci))
}
# Example for KK_AL vs Combined_AL (confusion matrix already given)
kk_al_table <- table(data$KK_AL, data$Combined_AL)
# Total number of subjects (replace with your actual n)
n <- sum(kk_al_table) # Total number of subjects
# Calculate manual Kappa, p-value, and 95% CI
kk_al_result <- manual_kappa_with_ci(kk_al_table, n)
# Print Kappa, p-value, and 95% CI
print(paste("Kappa: ", kk_al_result$Kappa))
[1] "Kappa: 0.955265127526052"
print(paste("p-value: ", kk_al_result$p_value))
[1] "p-value: 0"
print(paste("95% CI: ", round(kk_al_result$Lower_CI, 3), " to ", round(kk_al_result$Upper_CI, 3)))
[1] "95% CI: 0.923 to 0.987"
# Repeat for other comparisons
kappa_snf_al_result <- manual_kappa_with_ci(table(data$SNF_AL, data$Combined_AL), n)
kappa_kk_ev_result <- manual_kappa_with_ci(table(data$KK_EV, data$Combined_EV), n)
kappa_snf_ev_result <- manual_kappa_with_ci(table(data$SNF_EV, data$Combined_EV), n)
kappa_kk_tt_result <- manual_kappa_with_ci(table(data$KK_TT, data$Combined_TT), n)
kappa_snf_tt_result <- manual_kappa_with_ci(table(data$SNF_TT, data$Combined_TT), n)
kappa_kk_hw_result <- manual_kappa_with_ci(table(data$KK_HW, data$Combined_HW), n)
kappa_snf_hw_result <- manual_kappa_with_ci(table(data$SNF_HW, data$Combined_HW), n)
kappa_kk_sths_result <- manual_kappa_with_ci(table(data$STHs_KK, data$STHs_CombiNeged), n)
kappa_snf_sths_result <- manual_kappa_with_ci(table(data$STHs_SNF, data$STHs_CombiNeged), n)
kappa_bm_hw_result <- manual_kappa_with_ci(table(data$Baermann_HW_SG, data$Combined_HW), n)
kappa_bm_sth_result <- manual_kappa_with_ci(table(data$STHs_Baermann, data$STHs_CombiNeged), n)
# Print results
print(kk_al_result)
$Kappa
[1] 0.9552651
$p_value
[1] 0
$Lower_CI
[1] 0.9234515
$Upper_CI
[1] 0.9870787
print(kappa_snf_al_result)
$Kappa
[1] 0.2362215
$p_value
[1] 9.444312e-06
$Lower_CI
[1] 0.1316968
$Upper_CI
[1] 0.3407462
print(kappa_kk_ev_result)
$Kappa
[1] 0.5457026
$p_value
[1] 0
$Lower_CI
[1] 0.4555621
$Upper_CI
[1] 0.6358431
print(kappa_snf_ev_result)
$Kappa
[1] 0.9135738
$p_value
[1] 0
$Lower_CI
[1] 0.8698285
$Upper_CI
[1] 0.9573192
print(kappa_kk_tt_result)
$Kappa
[1] 0.9596999
$p_value
[1] 0
$Lower_CI
[1] 0.9294701
$Upper_CI
[1] 0.9899297
print(kappa_snf_tt_result)
$Kappa
[1] 0.6788337
$p_value
[1] 0
$Lower_CI
[1] 0.5998467
$Upper_CI
[1] 0.7578208
print(kappa_kk_hw_result)
$Kappa
[1] 0.4547615
$p_value
[1] 0
$Lower_CI
[1] 0.3589592
$Upper_CI
[1] 0.5505639
print(kappa_snf_hw_result)
$Kappa
[1] 0.7283604
$p_value
[1] 0
$Lower_CI
[1] 0.6546547
$Upper_CI
[1] 0.802066
print(kappa_kk_sths_result)
$Kappa
[1] 0.8388045
$p_value
[1] 0
$Lower_CI
[1] 0.7802405
$Upper_CI
[1] 0.8973686
print(kappa_snf_sths_result)
$Kappa
[1] 0.39979
$p_value
[1] 1.998401e-15
$Lower_CI
[1] 0.3011916
$Upper_CI
[1] 0.4983885
print(kappa_bm_hw_result)
$Kappa
[1] 0.6723516
$p_value
[1] 0
$Lower_CI
[1] 0.5927256
$Upper_CI
[1] 0.7519776
print(kappa_bm_sth_result)
$Kappa
[1] 0.1235787
$p_value
[1] 0.023262
$Lower_CI
[1] 0.01683426
$Upper_CI
[1] 0.2303231
###
# Function to calculate manual Cohen's Kappa, p-value, and 95% CI
manual_kappa_with_ci <- function(conf_matrix, n) {
TP <- conf_matrix[2, 2] # True Positives
FP <- conf_matrix[1, 2] # False Positives
TN <- conf_matrix[1, 1] # True Negatives
FN <- conf_matrix[2, 1] # False Negatives
P_o <- (TP + TN) / sum(conf_matrix) # Observed Agreement
P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix)) # Expected Agreement
kappa <- (P_o - P_e) / (1 - P_e)
# Calculate the standard error (SE) of Cohen's Kappa
se_kappa <- sqrt((1 - kappa^2) / (n - 1))
# Calculate the z-score for Cohen's Kappa
z <- kappa / se_kappa
# Calculate the p-value using the z-score
p_value <- 2 * (1 - pnorm(abs(z))) # Two-tailed p-value
# Calculate the 95% Confidence Interval for Cohen's Kappa
lower_ci <- kappa - 1.96 * se_kappa
upper_ci <- kappa + 1.96 * se_kappa
# Format the 95% CI in the required style
ci_str <- paste("95% CI: ", round(lower_ci, 3), " to ", round(upper_ci, 3))
return(list(Kappa = kappa, p_value = p_value, CI = ci_str))
}
# Example for KK_AL vs Combined_AL (confusion matrix already given)
kk_al_table <- table(data$KK_AL, data$Combined_AL)
# Total number of subjects (replace with your actual n)
n <- sum(kk_al_table) # Total number of subjects
# Calculate manual Kappa, p-value, and 95% CI
kk_al_result <- manual_kappa_with_ci(kk_al_table, n)
# Print Kappa, p-value, and formatted 95% CI
print(paste("Kappa: ", round(kk_al_result$Kappa, 3)))
[1] "Kappa: 0.955"
print(paste("p-value: ", round(kk_al_result$p_value, 5)))
[1] "p-value: 0"
print(kk_al_result$CI)
[1] "95% CI: 0.923 to 0.987"
# Repeat for other comparisons
kappa_snf_al_result <- manual_kappa_with_ci(table(data$SNF_AL, data$Combined_AL), n)
kappa_kk_ev_result <- manual_kappa_with_ci(table(data$KK_EV, data$Combined_EV), n)
kappa_snf_ev_result <- manual_kappa_with_ci(table(data$SNF_EV, data$Combined_EV), n)
kappa_kk_tt_result <- manual_kappa_with_ci(table(data$KK_TT, data$Combined_TT), n)
kappa_snf_tt_result <- manual_kappa_with_ci(table(data$SNF_TT, data$Combined_TT), n)
kappa_kk_hw_result <- manual_kappa_with_ci(table(data$KK_HW, data$Combined_HW), n)
kappa_snf_hw_result <- manual_kappa_with_ci(table(data$SNF_HW, data$Combined_HW), n)
kappa_kk_sths_result <- manual_kappa_with_ci(table(data$STHs_KK, data$STHs_CombiNeged), n)
kappa_snf_sths_result <- manual_kappa_with_ci(table(data$STHs_SNF, data$STHs_CombiNeged), n)
kappa_bm_hw_result <- manual_kappa_with_ci(table(data$Baermann_HW_SG, data$Combined_HW), n)
kappa_bm_sth_result <- manual_kappa_with_ci(table(data$STHs_Baermann, data$STHs_CombiNeged), n)
# Print results
print(kk_al_result)
$Kappa
[1] 0.9552651
$p_value
[1] 0
$CI
[1] "95% CI: 0.923 to 0.987"
print(kappa_snf_al_result)
$Kappa
[1] 0.2362215
$p_value
[1] 9.444312e-06
$CI
[1] "95% CI: 0.132 to 0.341"
print(kappa_kk_ev_result)
$Kappa
[1] 0.5457026
$p_value
[1] 0
$CI
[1] "95% CI: 0.456 to 0.636"
print(kappa_snf_ev_result)
$Kappa
[1] 0.9135738
$p_value
[1] 0
$CI
[1] "95% CI: 0.87 to 0.957"
print(kappa_kk_tt_result)
$Kappa
[1] 0.9596999
$p_value
[1] 0
$CI
[1] "95% CI: 0.929 to 0.99"
print(kappa_snf_tt_result)
$Kappa
[1] 0.6788337
$p_value
[1] 0
$CI
[1] "95% CI: 0.6 to 0.758"
print(kappa_kk_hw_result)
$Kappa
[1] 0.4547615
$p_value
[1] 0
$CI
[1] "95% CI: 0.359 to 0.551"
print(kappa_snf_hw_result)
$Kappa
[1] 0.7283604
$p_value
[1] 0
$CI
[1] "95% CI: 0.655 to 0.802"
print(kappa_kk_sths_result)
$Kappa
[1] 0.8388045
$p_value
[1] 0
$CI
[1] "95% CI: 0.78 to 0.897"
print(kappa_snf_sths_result)
$Kappa
[1] 0.39979
$p_value
[1] 1.998401e-15
$CI
[1] "95% CI: 0.301 to 0.498"
print(kappa_bm_hw_result)
$Kappa
[1] 0.6723516
$p_value
[1] 0
$CI
[1] "95% CI: 0.593 to 0.752"
print(kappa_bm_sth_result)
$Kappa
[1] 0.1235787
$p_value
[1] 0.023262
$CI
[1] "95% CI: 0.017 to 0.23"
###########
# Function to calculate manual Cohen's Kappa, p-value, 95% CI, P_o, and P_e
manual_kappa_with_details <- function(conf_matrix, n) {
TP <- conf_matrix[2, 2] # True Positives
FP <- conf_matrix[1, 2] # False Positives
TN <- conf_matrix[1, 1] # True Negatives
FN <- conf_matrix[2, 1] # False Negatives
# Calculate Observed Agreement (P_o)
P_o <- (TP + TN) / sum(conf_matrix)
# Calculate Expected Agreement (P_e)
P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix))
# Calculate Cohen's Kappa
kappa <- (P_o - P_e) / (1 - P_e)
# Calculate the standard error (SE) of Cohen's Kappa
se_kappa <- sqrt((1 - kappa^2) / (n - 1))
# Calculate the z-score for Cohen's Kappa
z <- kappa / se_kappa
# Calculate the p-value using the z-score
p_value <- 2 * (1 - pnorm(abs(z))) # Two-tailed p-value
# Calculate the 95% Confidence Interval for Cohen's Kappa
lower_ci <- kappa - 1.96 * se_kappa
upper_ci <- kappa + 1.96 * se_kappa
# Format the 95% CI in the required style
ci_str <- paste("95% CI: ", round(lower_ci, 3), " to ", round(upper_ci, 3))
# Return the results
return(list(Kappa = kappa, p_value = p_value, P_o = P_o, P_e = P_e, CI = ci_str))
}
# Example for KK_AL vs Combined_AL (confusion matrix already given)
kk_al_table <- table(data$KK_AL, data$Combined_AL)
# Total number of subjects (replace with your actual n)
n <- sum(kk_al_table) # Total number of subjects
# Calculate manual Kappa, p-value, 95% CI, P_o, and P_e
kk_al_result <- manual_kappa_with_details(kk_al_table, n)
# Print Kappa, p-value, P_o, P_e, and formatted 95% CI
print(paste("Kappa: ", round(kk_al_result$Kappa, 3)))
[1] "Kappa: 0.955"
print(paste("p-value: ", round(kk_al_result$p_value, 5)))
[1] "p-value: 0"
print(paste("P_o (Observed Agreement): ", round(kk_al_result$P_o, 3)))
[1] "P_o (Observed Agreement): 0.979"
print(paste("P_e (Expected Agreement): ", round(kk_al_result$P_e, 3)))
[1] "P_e (Expected Agreement): 0.53"
print(kk_al_result$CI)
[1] "95% CI: 0.923 to 0.987"
# Repeat for other comparisons
kappa_snf_al_result <- manual_kappa_with_details(table(data$SNF_AL, data$Combined_AL), n)
kappa_kk_ev_result <- manual_kappa_with_details(table(data$KK_EV, data$Combined_EV), n)
kappa_snf_ev_result <- manual_kappa_with_details(table(data$SNF_EV, data$Combined_EV), n)
kappa_kk_tt_result <- manual_kappa_with_details(table(data$KK_TT, data$Combined_TT), n)
kappa_snf_tt_result <- manual_kappa_with_details(table(data$SNF_TT, data$Combined_TT), n)
kappa_kk_hw_result <- manual_kappa_with_details(table(data$KK_HW, data$Combined_HW), n)
kappa_snf_hw_result <- manual_kappa_with_details(table(data$SNF_HW, data$Combined_HW), n)
kappa_kk_sths_result <- manual_kappa_with_details(table(data$STHs_KK, data$STHs_CombiNeged), n)
kappa_snf_sths_result <- manual_kappa_with_details(table(data$STHs_SNF, data$STHs_CombiNeged), n)
kappa_bm_hw_result <- manual_kappa_with_details(table(data$Baermann_HW_SG, data$Combined_HW), n)
kappa_bm_sth_result <- manual_kappa_with_details(table(data$STHs_Baermann, data$STHs_CombiNeged), n)
# Print results
print(kk_al_result)
$Kappa
[1] 0.9552651
$p_value
[1] 0
$P_o
[1] 0.978979
$P_e
[1] 0.5300977
$CI
[1] "95% CI: 0.923 to 0.987"
print(kappa_snf_al_result)
$Kappa
[1] 0.2362215
$p_value
[1] 9.444312e-06
$P_o
[1] 0.6906907
$P_e
[1] 0.5950275
$CI
[1] "95% CI: 0.132 to 0.341"
print(kappa_kk_ev_result)
$Kappa
[1] 0.5457026
$p_value
[1] 0
$P_o
[1] 0.975976
$P_e
[1] 0.9471183
$CI
[1] "95% CI: 0.456 to 0.636"
print(kappa_snf_ev_result)
$Kappa
[1] 0.9135738
$p_value
[1] 0
$P_o
[1] 0.993994
$P_e
[1] 0.9305071
$CI
[1] "95% CI: 0.87 to 0.957"
print(kappa_kk_tt_result)
$Kappa
[1] 0.9596999
$p_value
[1] 0
$P_o
[1] 0.993994
$P_e
[1] 0.8509681
$CI
[1] "95% CI: 0.929 to 0.99"
print(kappa_snf_tt_result)
$Kappa
[1] 0.6788337
$p_value
[1] 0
$P_o
[1] 0.960961
$P_e
[1] 0.878446
$CI
[1] "95% CI: 0.6 to 0.758"
print(kappa_kk_hw_result)
$Kappa
[1] 0.4547615
$p_value
[1] 0
$P_o
[1] 0.9069069
$P_e
[1] 0.8292617
$CI
[1] "95% CI: 0.359 to 0.551"
print(kappa_snf_hw_result)
$Kappa
[1] 0.7283604
$p_value
[1] 0
$P_o
[1] 0.9459459
$P_e
[1] 0.8010082
$CI
[1] "95% CI: 0.655 to 0.802"
print(kappa_kk_sths_result)
$Kappa
[1] 0.8388045
$p_value
[1] 0
$P_o
[1] 0.9189189
$P_e
[1] 0.4970015
$CI
[1] "95% CI: 0.78 to 0.897"
print(kappa_snf_sths_result)
$Kappa
[1] 0.39979
$p_value
[1] 1.998401e-15
$P_o
[1] 0.6906907
$P_e
[1] 0.4846648
$CI
[1] "95% CI: 0.301 to 0.498"
print(kappa_bm_hw_result)
$Kappa
[1] 0.6723516
$p_value
[1] 0
$P_o
[1] 0.9369369
$P_e
[1] 0.8075282
$CI
[1] "95% CI: 0.593 to 0.752"
print(kappa_bm_sth_result)
$Kappa
[1] 0.1235787
$p_value
[1] 0.023262
$P_o
[1] 0.5405405
$P_e
[1] 0.475755
$CI
[1] "95% CI: 0.017 to 0.23"
---
title: "Evaluation of performance of Diagniostic Test"
output: html_notebook
---


```{r}
# Load necessary libraries
library(readxl)  # For reading Excel files
library(dplyr)   # For data manipulation
library(irr)     # For Kappa statistic
library(writexl)

# Count Pos/Neg for each column
freq_table <- sapply(data[, -1], function(x) table(x))
freq_df <- as.data.frame(t(freq_table))
freq_df

# Function to calculate prevalence and CI
calculate_prevalence <- function(test_column) {
  positives <- sum(test_column == "Pos")  # Count the number of positives
  total <- length(test_column)            # Total number of participants
  
  prevalence <- positives / total        # Prevalence
  
# Calculate 95% CI for prevalence
  ci <- prop.test(positives, total, conf.level = 0.95)
  
  return(c(Prevalence = prevalence, CI_Lower = ci$conf.int[1], CI_Upper = ci$conf.int[2]))
}

# Apply the function to each diagnostic method
prevalence_combined_EV <- calculate_prevalence(data$Combined_EV)
prevalence_kk_EV <- calculate_prevalence(data$KK_EV)
prevalence_snf_EV <- calculate_prevalence(data$SNF_EV)

prevalence_combined_TT <- calculate_prevalence(data$Combined_TT)
prevalence_kk_TT <- calculate_prevalence(data$KK_TT)
prevalence_snf_TT <- calculate_prevalence(data$SNF_TT)

prevalence_combined_AL <- calculate_prevalence(data$Combined_AL)
prevalence_kk_AL <- calculate_prevalence(data$KK_AL)
prevalence_snf_AL <- calculate_prevalence(data$SNF_AL)

prevalence_combined_HW <- calculate_prevalence(data$Combined_HW)
prevalence_kk_HW <- calculate_prevalence(data$KK_HW)
prevalence_snf_HW <- calculate_prevalence(data$SNF_HW)
prevalence_baermann_HW_SG <- calculate_prevalence(data$Baermann_HW_SG)

prevalence_combined_STHs <- calculate_prevalence(data$STHs_CombiNeged)
prevalence_kk_STHs <- calculate_prevalence(data$STHs_KK)
prevalence_snf_STHs <- calculate_prevalence(data$STHs_SNF)
prevalence_STHs_baermann <- calculate_prevalence(data$STHs_Baermann)


prevalence_table <- data.frame(
  Diagnostic_Technique = c(
    "Combined_EV", "KK_EV", "SNF_EV",
    "Combined_TT", "KK_TT", "SNF_TT",
    "Combined_AL", "KK_AL", "SNF_AL",
    "Combined_HW", "KK_HW", "SNF_HW",
    "Baermann_HW_SG",
    "Combined_STHs", "KK_STHs", "SNF_STHs", "STHs_Baermann"
  ),
  Prevalence = c(
    prevalence_combined_EV[1], prevalence_kk_EV[1], prevalence_snf_EV[1],
    prevalence_combined_TT[1], prevalence_kk_TT[1], prevalence_snf_TT[1],
    prevalence_combined_AL[1], prevalence_kk_AL[1], prevalence_snf_AL[1],
    prevalence_combined_HW[1], prevalence_kk_HW[1], prevalence_snf_HW[1],
    prevalence_baermann_HW_SG[1],
    prevalence_combined_STHs[1], prevalence_kk_STHs[1], prevalence_snf_STHs[1],
    prevalence_STHs_baermann[1]
  ),
  CI_Lower = c(
    prevalence_combined_EV[2], prevalence_kk_EV[2], prevalence_snf_EV[2],
    prevalence_combined_TT[2], prevalence_kk_TT[2], prevalence_snf_TT[2],
    prevalence_combined_AL[2], prevalence_kk_AL[2], prevalence_snf_AL[2],
    prevalence_combined_HW[2], prevalence_kk_HW[2], prevalence_snf_HW[2],
    prevalence_baermann_HW_SG[2],
    prevalence_combined_STHs[2], prevalence_kk_STHs[2], prevalence_snf_STHs[2],
    prevalence_STHs_baermann[2]
  ),
  CI_Upper = c(
    prevalence_combined_EV[3], prevalence_kk_EV[3], prevalence_snf_EV[3],
    prevalence_combined_TT[3], prevalence_kk_TT[3], prevalence_snf_TT[3],
    prevalence_combined_AL[3], prevalence_kk_AL[3], prevalence_snf_AL[3],
    prevalence_combined_HW[3], prevalence_kk_HW[3], prevalence_snf_HW[3],
    prevalence_baermann_HW_SG[3],
    prevalence_combined_STHs[3], prevalence_kk_STHs[3], prevalence_snf_STHs[3],
    prevalence_STHs_baermann[3]
  )
)

print(prevalence_table)


# Function to calculate sensitivity, specificity, PPV, and NPV
calculate_metrics <- function(conf_matrix) {
  TP <- conf_matrix[2, 2]  # True Positive
  FP <- conf_matrix[1, 2]  # False Positive
  TN <- conf_matrix[1, 1]  # True Negative
  FN <- conf_matrix[2, 1]  # False Negative
  
  # Sensitivity = TP / (TP + FN)
  sensitivity <- TP / (TP + FN)
  
  # Specificity = TN / (TN + FP)
  specificity <- TN / (TN + FP)
  
  # PPV = TP / (TP + FP)
  ppv <- TP / (TP + FP)
  
  # NPV = TN / (TN + FN)
  npv <- TN / (TN + FN)
  
  # Calculate 95% CI for each metric using prop.test
  sensitivity_ci <- prop.test(TP, TP + FN, conf.level = 0.95)$conf.int
  specificity_ci <- prop.test(TN, TN + FP, conf.level = 0.95)$conf.int
  ppv_ci <- prop.test(TP, TP + FP, conf.level = 0.95)$conf.int
  npv_ci <- prop.test(TN, TN + FN, conf.level = 0.95)$conf.int
  
  return(data.frame(
    Sensitivity = round(sensitivity * 100, 2),
    Sensitivity_Lower_CI = round(sensitivity_ci[1] * 100, 2),
    Sensitivity_Upper_CI = round(sensitivity_ci[2] * 100, 2),
    Specificity = round(specificity * 100, 2),
    Specificity_Lower_CI = round(specificity_ci[1] * 100, 2),
    Specificity_Upper_CI = round(specificity_ci[2] * 100, 2),
    PPV = round(ppv * 100, 2),
    PPV_Lower_CI = round(ppv_ci[1] * 100, 2),
    PPV_Upper_CI = round(ppv_ci[2] * 100, 2),
    NPV = round(npv * 100, 2),
    NPV_Lower_CI = round(npv_ci[1] * 100, 2),
    NPV_Upper_CI = round(npv_ci[2] * 100, 2)
  ))
}

# Gold standards for each species
gold_standards <- list(
  AL = "Combined_AL",        # A. lumbricoides uses Combined_AL as the gold standard
  HW = "Combined_HW",        # Hookworm uses Combined_HW as the gold standard
  TT = "Combined_TT",        # T. trichiura uses Combined_TT as the gold standard
  EV = "Combined_EV",        # E. vermicularis uses Combined_EV as the gold standard
  STHs = "STHs_CombiNeged"   # STHs (all species combined) uses STHs_CombiNeged as the gold standard
)

# List of diagnostic methods (your actual diagnostic test columns in your dataset)
diagnostic_methods <- c("STHs_KK", "KK_EV", "KK_TT", "KK_AL", "KK_HW", 
                        "STHs_SNF", "SNF_EV", "SNF_TT", "SNF_AL", "SNF_HW",
                        "STHs_Baermann", "Baermann_HW_SG")

# Create an empty data frame to store results
results_table <- data.frame()

# Loop through each diagnostic method and calculate metrics
for (test in diagnostic_methods) {
  
  # Get the species corresponding to each test method
  if (grepl("KK_", test)) {
    species <- sub("KK_", "", test)  # Extract the species name from the test (e.g., KK_AL -> AL)
  } else if (grepl("SNF_", test)) {
    species <- sub("SNF_", "", test) # Extract the species name from the test (e.g., SNF_EV -> EV)
  } else if (grepl("Baermann", test)) {
    species <- "STHs"  # Baermann methods will use STHs combined data
  } else {
    species <- "STHs"  # Other tests like STHs_KK use combined data
  }
  
  # Define the gold standard for this species
  gold_standard <- gold_standards[[species]]
  
  # Create confusion matrix for each test vs the gold standard (species-specific gold standard)
  conf_matrix <- table(data[[gold_standard]], data[[test]])
  
  # Calculate sensitivity, specificity, PPV, and NPV for each method
  metrics <- calculate_metrics(conf_matrix)
  
  # Add results to the results table
  results_table <- rbind(results_table, data.frame(
    Diagnostic_Technique = test,
    Species = species,
    Sensitivity = metrics$Sensitivity,
    Sensitivity_Lower_CI = metrics$Sensitivity_Lower_CI,
    Sensitivity_Upper_CI = metrics$Sensitivity_Upper_CI,
    Specificity = metrics$Specificity,
    Specificity_Lower_CI = metrics$Specificity_Lower_CI,
    Specificity_Upper_CI = metrics$Specificity_Upper_CI,
    PPV = metrics$PPV,
    PPV_Lower_CI = metrics$PPV_Lower_CI,
    PPV_Upper_CI = metrics$PPV_Upper_CI,
    NPV = metrics$NPV,
    NPV_Lower_CI = metrics$NPV_Lower_CI,
    NPV_Upper_CI = metrics$NPV_Upper_CI
  ))
}

# Print the results table
print(results_table)


# Function to calculate frequencies of positive and negative cases for each diagnostic method
calculate_frequencies <- function(test_column, species_column) {
  # Create a contingency table for the species and the diagnostic method
  freq_table <- table(species_column, test_column)
  
  return(freq_table)
}

# Create an empty data frame to store results
frequencies_table <- data.frame()

# List of diagnostic methods (your actual diagnostic test columns in your dataset)
diagnostic_methods <- c("STHs_KK", "KK_EV", "KK_TT", "KK_AL", "KK_HW", 
                        "STHs_SNF", "SNF_EV", "SNF_TT", "SNF_AL", "SNF_HW",
                        "STHs_Baermann", "Baermann_HW_SG")

# Loop through each diagnostic method and calculate frequencies
for (test in diagnostic_methods) {
  
  # Get the species corresponding to each test method
  if (grepl("KK_", test)) {
    species <- sub("KK_", "", test)  # Extract the species name from the test (e.g., KK_AL -> AL)
  } else if (grepl("SNF_", test)) {
    species <- sub("SNF_", "", test) # Extract the species name from the test (e.g., SNF_EV -> EV)
  } else if (grepl("Baermann", test)) {
    species <- "STHs"  # Baermann methods will use STHs combined data
  } else {
    species <- "STHs"  # Other tests like STHs_KK use combined data
  }
  
  # Define the gold standard for this species
  gold_standard <- gold_standards[[species]]
  
  # Calculate frequencies for each diagnostic method and species
  freq_table <- calculate_frequencies(data[[test]], data[[gold_standard]])
  
  # Add the frequencies table to the results
  frequencies_table <- rbind(frequencies_table, data.frame(
    Diagnostic_Technique = test,
    Species = species,
    Positives = freq_table["Pos", "Pos"],  # Positive cases
    Negatives = freq_table["Neg", "Neg"]   # Negative cases
  ))
}

# Print the frequencies table
print(frequencies_table)


# Function to calculate prevalence and 95% CI
calculate_prevalence <- function(test_column, total_participants) {
  positives <- sum(test_column == "Pos")  # Count the number of positives
  prevalence <- positives / total_participants * 100  # Prevalence in percentage
  
  # Calculate 95% CI for prevalence using prop.test
  ci <- prop.test(positives, total_participants, conf.level = 0.95)
  
  return(c(Prevalence = prevalence, CI_Lower = ci$conf.int[1] * 100, CI_Upper = ci$conf.int[2] * 100))
}

# Total participants
total_participants <- 333

# Assuming 'data' contains your dataset and "KK" and "SNF" are the respective columns
# Prevalence for KK method
prevalence_KK <- calculate_prevalence(data$STHs_KK, total_participants)

# Prevalence for SNF method
prevalence_SNF <- calculate_prevalence(data$STHs_SNF, total_participants)

# Prevalence for combined KK and SNF method (i.e., any positive result in either KK or SNF)
combined_KK_SNF <- calculate_prevalence((data$STHs_KK == "Pos" | data$STHs_SNF == "Pos"), total_participants)

# Prepare the results table
results <- data.frame(
  Method = c("KK", "SNF", "KK + SNF"),
  Total_Examined = rep(total_participants, 3),
  Positives = c(sum(data$STHs_KK == "Pos"), sum(data$STHs_SNF == "Pos"), sum(data$STHs_KK == "Pos" | data$STHs_SNF == "Pos")),
  Prevalence = c(prevalence_KK[1], prevalence_SNF[1], combined_KK_SNF[1]),
  CI_Lower = c(prevalence_KK[2], prevalence_SNF[2], combined_KK_SNF[2]),
  CI_Upper = c(prevalence_KK[3], prevalence_SNF[3], combined_KK_SNF[3])
)

# Print the results
print(results)

positives <- 176
total_participants <- 333
# Use prop.test to calculate the 95% confidence interval for the proportion of positives
ci_result <- prop.test(positives, total_participants, conf.level = 0.95)
# Print the result
ci_result$conf.int

################################################


# Function to calculate manual Cohen's Kappa
manual_kappa <- function(conf_matrix) {
  TP <- conf_matrix[2, 2]  # True Positives
  FP <- conf_matrix[1, 2]  # False Positives
  TN <- conf_matrix[1, 1]  # True Negatives
  FN <- conf_matrix[2, 1]  # False Negatives
  
  P_o <- (TP + TN) / sum(conf_matrix)  # Observed Agreement
  P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
    ((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix))  # Expected Agreement
  
  kappa <- (P_o - P_e) / (1 - P_e)
  return(kappa)
}
# Example confusion matrix for KK_AL vs Combined_AL
kk_al_table <- table(data$KK_AL, data$Combined_AL)
# Calculate manual Kappa for each comparison
kappa_kk_al <- manual_kappa(kk_al_table)
kappa_kk_al


#Calculate for snf_al,kk_ev,snf_ev
kappa_snf_al <- manual_kappa(table(data$SNF_AL, data$Combined_AL))
kappa_kk_ev <- manual_kappa(table(data$KK_EV, data$Combined_EV))
kappa_snf_ev <- manual_kappa(table(data$SNF_EV, data$Combined_EV))
kappa_kk_tt <- manual_kappa(table(data$KK_TT, data$Combined_TT))
kappa_snf_tt <- manual_kappa(table(data$SNF_TT, data$Combined_TT))
kappa_kk_hw <- manual_kappa(table(data$KK_HW, data$Combined_HW))
kappa_snf_hw <- manual_kappa(table(data$SNF_HW, data$Combined_HW))

kappa_kk_STHs <- manual_kappa(table(data$STHs_KK, data$STHs_CombiNeged))
kappa_snf_STHs <- manual_kappa(table(data$STHs_SNF, data$STHs_CombiNeged))

kappa_bm_hw <- manual_kappa(table(data$Baermann_HW_SG, data$Combined_HW))
kappa_bm_STH <- manual_kappa(table(data$STHs_Baermann, data$STHs_CombiNeged))



print(kappa_snf_al)
print(kappa_kk_ev)
print(kappa_snf_ev)
print(kappa_kk_tt)
print(kappa_snf_tt)
print(kappa_kk_hw)
print(kappa_snf_hw)
print(kappa_kk_STHs)
print(kappa_snf_STHs)
print(kappa_bm_hw)
print(kappa_bm_STH)
##########################

# Function to calculate manual Cohen's Kappa, p-value, 95% CI, P_o, and P_e
manual_kappa_with_details <- function(conf_matrix, n) {
  TP <- conf_matrix[2, 2]  # True Positives
  FP <- conf_matrix[1, 2]  # False Positives
  TN <- conf_matrix[1, 1]  # True Negatives
  FN <- conf_matrix[2, 1]  # False Negatives
  
  # Calculate Observed Agreement (P_o)
  P_o <- (TP + TN) / sum(conf_matrix)
  
  # Calculate Expected Agreement (P_e)
  P_e <- ((TP + FP) / sum(conf_matrix)) * ((TP + FN) / sum(conf_matrix)) +
    ((TN + FN) / sum(conf_matrix)) * ((TN + FP) / sum(conf_matrix))
  
  # Calculate Cohen's Kappa
  kappa <- (P_o - P_e) / (1 - P_e)
  
  # Calculate the standard error (SE) of Cohen's Kappa
  se_kappa <- sqrt((1 - kappa^2) / (n - 1))
  
  # Calculate the z-score for Cohen's Kappa
  z <- kappa / se_kappa
  
  # Calculate the p-value using the z-score
  p_value <- 2 * (1 - pnorm(abs(z)))  # Two-tailed p-value
  
  # Calculate the 95% Confidence Interval for Cohen's Kappa
  lower_ci <- kappa - 1.96 * se_kappa
  upper_ci <- kappa + 1.96 * se_kappa
  
  # Format the 95% CI in the required style
  ci_str <- paste("95% CI: ", round(lower_ci, 3), " to ", round(upper_ci, 3))
  
  # Return the results
  return(list(Kappa = kappa, p_value = p_value, P_o = P_o, P_e = P_e, CI = ci_str))
}

# Example for KK_AL vs Combined_AL (confusion matrix already given)
kk_al_table <- table(data$KK_AL, data$Combined_AL)

# Total number of subjects (replace with your actual n)
n <- sum(kk_al_table)  # Total number of subjects

# Calculate manual Kappa, p-value, 95% CI, P_o, and P_e
kk_al_result <- manual_kappa_with_details(kk_al_table, n)

# Print Kappa, p-value, P_o, P_e, and formatted 95% CI
print(paste("Kappa: ", round(kk_al_result$Kappa, 3)))
print(paste("p-value: ", round(kk_al_result$p_value, 5)))
print(paste("P_o (Observed Agreement): ", round(kk_al_result$P_o, 3)))
print(paste("P_e (Expected Agreement): ", round(kk_al_result$P_e, 3)))
print(kk_al_result$CI)

# Repeat for other comparisons
kappa_snf_al_result <- manual_kappa_with_details(table(data$SNF_AL, data$Combined_AL), n)
kappa_kk_ev_result <- manual_kappa_with_details(table(data$KK_EV, data$Combined_EV), n)
kappa_snf_ev_result <- manual_kappa_with_details(table(data$SNF_EV, data$Combined_EV), n)
kappa_kk_tt_result <- manual_kappa_with_details(table(data$KK_TT, data$Combined_TT), n)
kappa_snf_tt_result <- manual_kappa_with_details(table(data$SNF_TT, data$Combined_TT), n)
kappa_kk_hw_result <- manual_kappa_with_details(table(data$KK_HW, data$Combined_HW), n)
kappa_snf_hw_result <- manual_kappa_with_details(table(data$SNF_HW, data$Combined_HW), n)
kappa_kk_sths_result <- manual_kappa_with_details(table(data$STHs_KK, data$STHs_CombiNeged), n)
kappa_snf_sths_result <- manual_kappa_with_details(table(data$STHs_SNF, data$STHs_CombiNeged), n)
kappa_bm_hw_result <- manual_kappa_with_details(table(data$Baermann_HW_SG, data$Combined_HW), n)
kappa_bm_sth_result <- manual_kappa_with_details(table(data$STHs_Baermann, data$STHs_CombiNeged), n)

# Print results
print(kk_al_result)
print(kappa_snf_al_result)
print(kappa_kk_ev_result)
print(kappa_snf_ev_result)
print(kappa_kk_tt_result)
print(kappa_snf_tt_result)
print(kappa_kk_hw_result)
print(kappa_snf_hw_result)
print(kappa_kk_sths_result)
print(kappa_snf_sths_result)
print(kappa_bm_hw_result)
print(kappa_bm_sth_result)
```

