Packages

Use tidiverse, meta, metafor, metamisc, janitor

Dataset

Load the dataset

Set theme for the figures

theme_set(theme_minimal())

Variables

names(df)

##  [1] "Author, Date"                 "Check"                       
##  [3] "Ref"                          "Modality"                    
##  [5] "Model Type"                   "Optimizer/Architecture/Other"
##  [7] "Activation function"          "Task"                        
##  [9] "Task Desc"                    "Caries threshold"            
## [11] "Annotators"                   "Annotator details"           
## [13] "Type of annotators"           "Agreement annotators"        
## [15] "TP"                           "TN"                          
## [17] "FP"                           "FN"                          
## [19] "# Samples"                    "train"                       
## [21] "test"                         "validation"                  
## [23] "Precision"                    "Specificity"                 
## [25] "Sensitivity (Recall)"         "Accuracy"                    
## [27] "Prevalence"                   "PPV"                         
## [29] "NPV"                          "mAP mean average precision"  
## [31] "IoU intersection over union"  "F1 - Dice"                   
## [33] "Comments"

Type of variables

glimpse(df)

## Rows: 47
## Columns: 33
## $ `Author, Date`                 <chr> "Salehi, H. S., et al. 2019", "Salehi, …
## $ Check                          <chr> "Deep learning-based quantitative analy…
## $ Ref                            <dbl> 13, 38, 38, 1, 34, 33, 34, 34, 34, 10, …
## $ Modality                       <chr> "OCT - Images", "OCT - Images", "OCT - …
## $ `Model Type`                   <chr> "CNN", "CNN", "CNN", "CNN", "CNN", "CNN…
## $ `Optimizer/Architecture/Other` <chr> NA, "Softmax 2 FC + 3 CNN", "Softmax 3 …
## $ `Activation function`          <chr> "ReLU", "ReLU", "ReLU", NA, NA, "ReLU",…
## $ Task                           <chr> "Classification", "Classification", "Cl…
## $ `Task Desc`                    <chr> "Caries detection", "Caries detection",…
## $ `Caries threshold`             <chr> "enamel/dentin", "enamel", "enamel", "e…
## $ Annotators                     <dbl> NA, NA, NA, 3, NA, 2, NA, NA, NA, NA, N…
## $ `Annotator details`            <chr> "Unclear", "Unclear", "Unclear", "Denti…
## $ `Type of annotators`           <chr> "Unclear", "Unclear", "Unclear", "Denti…
## $ `Agreement annotators`         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ TP                             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ TN                             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FP                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FN                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `# Samples`                    <dbl> 51, 81, 81, 7200, 81, NA, 81, 81, 81, 4…
## $ train                          <dbl> NA, 60, 60, 6000, 60, NA, 60, 60, 60, N…
## $ test                           <dbl> NA, 21, 21, 1200, 21, NA, 21, 21, 21, N…
## $ validation                     <dbl> NA, 0, 0, 0, 0, NA, 0, 0, 0, NA, 0, 0, …
## $ Precision                      <dbl> 0.999, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Specificity                    <dbl> 0.998, 0.990, 0.972, 0.972, 0.962, 0.96…
## $ `Sensitivity (Recall)`         <dbl> 0.985, 0.582, 0.809, 0.963, 0.758, 0.89…
## $ Accuracy                       <dbl> 0.910, 0.827, 0.907, 0.945, 0.887, 0.95…
## $ Prevalence                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ PPV                            <dbl> 0.999, 0.974, 0.941, NA, 0.820, NA, 0.6…
## $ NPV                            <dbl> 0.972, 0.782, 0.885, NA, 0.873, NA, 0.6…
## $ `mAP mean average precision`   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `IoU intersection over union`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `F1 - Dice`                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ Comments                       <chr> NA, "mAP, IoU :unclear, used the last r…

Data cleaning

df <- df %>% 
  select(-Comments) %>% 
  janitor::clean_names()

Create a new column to extract only the radiology

df <- df %>% 
  separate(modality, into = c("source", "type"), sep = "-")

df <- df %>% 
  mutate(type = str_trim(type, side = c("both"))) #removing some white spaces

Order the levels for caries threshold

table(df$caries_threshold)

## 
##                                        caries yes/no 
##                                                   10 
##                                            cavitated 
##                                                    4 
##                                               dentin 
##                                                    4 
##                                               enamel 
##                                                    4 
##                                        enamel/dentin 
##                                                   15 
## healthy/white spot/ dentin cavitated/micro-cavitated 
##                                                    1 
##                                              Unclear 
##                                                    7

df <- df %>% 
  mutate(caries_threshold = forcats::fct_relevel(caries_threshold, 
                       "enamel", 
                       "enamel/dentin", 
                       "healthy/white spot/ dentin cavitated/micro-cavitated", 
                       "dentin", 
                       "cavitated", 
                       "caries yes/no")
  )

Recode the certified dentists to dentists

table(df$annotator_details)

## 
##        Cariology specialista (min 3 years) 
##                                          1 
##                         Certified dentists 
##                                          2 
##                                   Dentists 
##                                         12 
##           dentists (8-11 years experience) 
##                                          1 
##           Dentists (8-11 years experience) 
##                                          2 
##                  Dentists and radiologists 
##                                          3 
##                        Experienced Dentist 
##                                          1 
##                            Expert dentists 
##                                          1 
## Oral Pathologist, Endodoncist, Radiologist 
##                                          1 
##                                Specialists 
##                                          1 
##                                    Unclear 
##                                         22

df$annotator_details <-
  forcats::fct_recode(
    df$annotator_details,
    "Specialists (no Rad)" = "Cariology specialista (min 3 years)",
    "Specialists (no Rad)" = "Specialists", 
    "Specialists (inc. Rad)" = "Oral Pathologist, Endodoncist, Radiologist", 
    "Specialists (inc. Rad)" = "Dentists and radiologists", 
    "Dentists" = "Certified dentists" ,
    "Dentists" = "Dentists (8-11 years experience)",
    "Dentists" = "dentists (8-11 years experience)"   ,
    "Dentists" =  "Experienced Dentist"                  ,
    "Dentists" =  "Expert dentists"
  )

Unify the caries thresholds

df <- df %>% 
  # change the letters
  mutate(caries_threshold = str_to_title(caries_threshold)) %>% 
  # add the NA to unclear
  mutate(caries_threshold = replace_na(caries_threshold, "Unclear")) %>% 
  # recode
  mutate(caries_threshold = fct_recode(
    caries_threshold, 
    "Enamel/Dentin" = "Healthy/White Spot/ Dentin Cavitated/Micro-Cavitated", 
    "Dentin/Cavitated" = "Cavitated", 
    "Dentin/Cavitated" = "Dentin"
  )) %>% 
  # relevel
  mutate(caries_threshold = fct_relevel(caries_threshold, 
                                        "Enamel", 
                                        "Enamel/Dentin", 
                                        "Dentin/Cavitated", 
                                        "Caries Yes/No", 
                                        "Unclear"))

Extract the year

df <- df %>% 
  mutate(year = as.numeric(str_extract(author_date, "[0-9]+"))) %>% 
  relocate(year, .after = author_date)

EDA

How many studies?

n_distinct(df$ref) %>% 
  knitr::kable()

x
33

By modality and model_type

Not grouped by study

df %>% 
  janitor::tabyl(type, model_type) %>% 
  knitr::kable()

type	Auto-Encoder	CNN	MLP	PNN
Images	0	22	2	1
Radiology	1	15	6	0

By diagnostic task

df %>% 
   janitor::tabyl(task_desc, type) %>% 
  knitr::kable()

task_desc	Images	Radiology
Caries detection	25	19
Periapical infection	0	1
Periodontitis	0	1
Unclear	0	1

By task

df %>% 
  janitor::tabyl(task_desc, task) %>% 
  knitr::kable()

task_desc	Classification	Segmentation
Caries detection	43	1
Periapical infection	1	0
Periodontitis	1	0
Unclear	1	0

samples by model type

df %>%
  group_by(model_type) %>%
  summarise(
    n = n(),
    sample_size_mean = mean(number_samples,  na.rm = TRUE),
    sd = sd(number_samples,  na.rm = TRUE) 
  ) %>% 
  knitr::kable()

model_type	n	sample_size_mean	sd
Auto-Encoder	1	480.0000	NA
CNN	37	935.4167	1692.4464
MLP	8	489.1250	680.0066
PNN	1	400.0000	NA

df %>%
  group_by(task_desc) %>%
  summarise(
    n = n(),
    sample_size_mean = mean(number_samples,  na.rm = TRUE),
    sd = sd(number_samples,  na.rm = TRUE)
  ) %>% 
  knitr::kable()

task_desc	n	sample_size_mean	sd
Caries detection	44	888.093	1568.688
Periapical infection	1	80.000	NA
Periodontitis	1	80.000	NA
Unclear	1	120.000	NA

df %>% 
  ggplot(aes(x = number_samples)) +
  geom_histogram(bins = 6) + 
  scale_x_log10() + 
  facet_grid(model_type ~ .) +
  labs(x = "Sample size")

Check sample size, ordered desc

df %>% 
  arrange(desc(number_samples)) %>% 
  select(author_date, task_desc, number_samples, specificity, sensitivity_recall) %>% 
  knitr::kable()

author_date	task_desc	number_samples	specificity	sensitivity_recall
Wang et al. 2020	Caries detection	7200	0.972	0.963
Kumar and Srivastava, 2018	Caries detection	6000	NA	0.730
Garcia Cantu et al., 2020	Caries detection	3686	0.830	0.750
Lee, J. H., et al. 2018	Caries detection	3000	0.830	0.810
Srivastava et al. 2017	Caries detection	3000	NA	0.081
Guijarro-Rodríguez, A. A., et al. 2020	Caries detection	2030	NA	NA
Lakshmi, M. M., et al. 2021	Caries detection	1900	NA	0.842
Haghanif-ar, A., et al. 2020	Caries detection	1838	0.507	NA
Singh, P. and P. Sehgal, 2021	Caries detection	1500	0.930	0.960
Holtkamp, A., et al. 2021	Caries detection	1319	0.650	0.550
Tripathi, P., et al. 2019	Caries detection	800	NA	NA
Riyadi, S., et al. 2020	Caries detection	660	NA	NA
Charvát, J., et al. 2020	Caries detection	578	NA	NA
Vinayahalingam, S., et al. 2021	Caries detection	500	0.860	0.870
Leo, L. M. and T. K. Reddy, 2021	Caries detection	480	NA	NA
Choi, Eun and Kim, 2018	Caries detection	475	NA	NA
Megalan Leo, L. and T. Kalpalatha Reddy, 2020	Caries detection	418	NA	NA
Singh, P. and P. Sehgal, 2020	Caries detection	400	0.930	0.930
Schwendicke, F., et al. 2020a	Caries detection	226	0.850	0.460
Schwendicke, F., et al. 2020b	Caries detection	226	0.760	0.590
Khan et al. 2020	Caries detection	206	NA	NA
Patil et al. 2018	Unclear	120	0.750	0.967
Sornam, M. and M. Prabhakaran, 2019	Caries detection	120	NA	NA
Sornam, M. and M. Prabhakaran, 2018	Caries detection	120	NA	NA
Geetha, V., et al. 2020	Caries detection	105	NA	NA
Salehi, H., et al. 2021b	Caries detection	81	0.990	0.582
Salehi, H., et al. 2021e	Caries detection	81	0.972	0.809
Salehi, H. S., et al. 2020d	Caries detection	81	0.962	0.758
Salehi, H. S., et al. 2020a	Caries detection	81	0.950	0.172
Salehi, H. S., et al. 2020c	Caries detection	81	0.933	0.758
Salehi, H. S., et al. 2020e	Caries detection	81	0.930	0.813
Salehi, H., et al. 2021d	Caries detection	81	0.915	0.856
Salehi, H. S., et al. 2020b	Caries detection	81	0.912	0.651
Salehi, H., et al. 2021f	Caries detection	81	0.908	0.952
Salehi, H., et al. 2021a	Caries detection	81	0.896	0.645
Salehi, H. S., et al. 2020f	Caries detection	81	0.895	0.632
Salehi, H. S., et al. 2020g	Caries detection	81	0.887	0.514
Salehi, H., et al. 2021c	Caries detection	81	0.696	0.971
Prajapati, S. A., et al. 2017	Caries detection	80	NA	NA
Prajapati, S. A., et al. 2017	Periapical infection	80	NA	NA
Prajapati, S. A., et al. 2017	Periodontitis	80	NA	NA
Sonavane, A., et al. 2021	Caries detection	74	NA	NA
Yu-Ping, H., et al. 2020	Caries detection	63	0.898	0.989
Salehi, H. S., et al. 2019	Caries detection	51	0.998	0.985
Patil, S., et al. 2019	Caries detection	40	0.500	1.000
Patil et al. 2018	Caries detection	40	0.250	0.880
Yu et al., 2020	Caries detection	NA	0.961	0.899

Variables creation

See Welton NJ, McAleenan A, Thom HHZ, et al. (2017) Methods for Meta-Analysis of the Diagnostic Test Accuracy Studies. NIHR Journals Library. https://www.ncbi.nlm.nih.gov/books/NBK436343/

df %>% 
  # select(tp:fn) %>% 
  # summary()
  pivot_longer(cols = tp:fn, 
               names_to = "matrix_names", 
               values_to = "matrix_values") %>% 
  ggplot(aes(x = matrix_values)) + 
  geom_histogram() + 
  # geom_boxplot() + 
  # geom_jitter() + 
  # scale_y_log10()
  facet_grid(matrix_names ~ .)

Meta-analysis (all studies!)

Calculate the diagnostic odds ratio (DOR)

DOR = (Spec. * Sen.) / ((1 - Spec.) * (1 - Sen.)) See Glas, A.S., Lijmer, J.G., Prins, M.H., Bonsel, G.J., and Bossuyt, P.M.M., 2003. The diagnostic odds ratio: a single indicator of test performance. Journal of clinical epidemiology, 56 (11), 1129–1135.

Create the DOR

df <- df %>% 
  mutate(dor = (specificity * sensitivity_recall) / ((1 - specificity) * (1 - sensitivity_recall)))

df %>% 
  filter(task_desc == "Caries detection" & dor > 0) %>% 
  ggplot(aes(x = dor)) +
  geom_histogram(bins = 20) +
  scale_x_log10() +
  facet_grid(caries_threshold ~ .) + 
  labs(title = "Diagnostic odds ratio (DOR) by study and diagnostic threshold", 
       y = "Number of studies", 
       x = "log10(DOR)") +
  theme(strip.text.y = element_text(angle=0))

df %>% 
  arrange(desc(dor)) %>% 
  select(author_date, sensitivity_recall, specificity, task_desc,  dor) %>% 
  filter(dor > 0 ) %>% 
  knitr::kable()

author_date	sensitivity_recall	specificity	task_desc	dor
Patil, S., et al. 2019	1.000	0.500	Caries detection	Inf
Salehi, H. S., et al. 2019	0.985	0.998	Caries detection	32767.666667
Wang et al. 2020	0.963	0.972	Caries detection	903.509653
Yu-Ping, H., et al. 2020	0.989	0.898	Caries detection	791.552585
Singh, P. and P. Sehgal, 2021	0.960	0.930	Caries detection	318.857143
Yu et al., 2020	0.899	0.961	Caries detection	219.329525
Salehi, H., et al. 2021f	0.952	0.908	Caries detection	195.746377
Singh, P. and P. Sehgal, 2020	0.930	0.930	Caries detection	176.510204
Salehi, H., et al. 2021e	0.809	0.972	Caries detection	147.035901
Salehi, H., et al. 2021b	0.582	0.990	Caries detection	137.842105
Patil et al. 2018	0.967	0.750	Unclear	87.909091
Salehi, H. S., et al. 2020d	0.758	0.962	Caries detection	79.294911
Salehi, H., et al. 2021c	0.971	0.696	Caries detection	76.657895
Salehi, H., et al. 2021d	0.856	0.915	Caries detection	63.990196
Salehi, H. S., et al. 2020e	0.813	0.930	Caries detection	57.760886
Salehi, H. S., et al. 2020c	0.758	0.933	Caries detection	43.617491
Vinayahalingam, S., et al. 2021	0.870	0.860	Caries detection	41.109890
Lee, J. H., et al. 2018	0.810	0.830	Caries detection	20.814242
Salehi, H. S., et al. 2020b	0.651	0.912	Caries detection	19.331597
Salehi, H., et al. 2021a	0.645	0.896	Caries detection	15.653304
Garcia Cantu et al., 2020	0.750	0.830	Caries detection	14.647059
Salehi, H. S., et al. 2020f	0.632	0.895	Caries detection	14.638716
Salehi, H. S., et al. 2020g	0.514	0.887	Caries detection	8.301795
Schwendicke, F., et al. 2020a	0.460	0.850	Caries detection	4.827160
Schwendicke, F., et al. 2020b	0.590	0.760	Caries detection	4.556911
Salehi, H. S., et al. 2020a	0.172	0.950	Caries detection	3.946860
Patil et al. 2018	0.880	0.250	Caries detection	2.444444
Holtkamp, A., et al. 2021	0.550	0.650	Caries detection	2.269841

Crazy results! DOR = 32767!! and even infinite

Check the formula

Compare original formula against https://doi.org/10.1080/03610918.2016.1157183

df %>% 
  mutate(dor_2 = ((sensitivity_recall / (1 - sensitivity_recall)) / ((1 - specificity) / specificity))) %>% 
  ggplot(aes(x = dor, 
             y = dor_2)) +
  geom_point() +
  scale_y_log10() + 
  scale_x_log10()

Why??

DOR rises steeply when sensitivity or specificity becomes nearly perfect ( Kraemer, H.Risk ratios, odds ratio, and the test QRQC.SAGE Publications, Inc., NewburyPark CA, 1992, pp. 103–113.)

Calculate the confidence interval for the DOR

Comparing tests using sensitivity and specificity (bivariate model)

https://methods.cochrane.org/sites/methods.cochrane.org.sdt/files/public/uploads/DTA%20Handbook%20Chapter%2011%20201312.pdf

For the bivariate analysis the following statistics will be reported with confidence intervals:  Estimates of the average sensitivity and specificity for each test  Estimates of the relative sensitivity and relative specificity expressed as odds ratios  P-values for the difference in sensitivity and for the difference in specificity

When the bivariate method has been used, the magnitude and direction of the difference between tests can be summarised either by reporting point estimates of the average sensitivity and specificity for the two tests, or measures of relative test sensitivity and specificity (relative measures are computed on a logit scale, and thus are technically odds ratios). It is not possible to directly translate relative measures of accuracy to the consequences of using one or other test. Therefore focusing on the size and significance (P-values) of any difference in estimates of average sensitivity and specificity between tests is likely to be the most accessible way of illustrating the potential impact of using different tests.

Final report: only caries & radiology detection

Filter only caries task

table(df$task_desc, df$type) %>% 
  knitr::kable()

	Images	Radiology
Caries detection	25	19
Periapical infection	0	1
Periodontitis	0	1
Unclear	0	1

df_only_radiology <- df %>% 
  filter(task_desc == "Caries detection" & 
           type == "Radiology")

How many unique studies?

df_only_radiology %>% 
  distinct(check) %>% 
  knitr::kable()

check
The Automatic Detection of Caries in Third Molars on Panoramic Radiographs Using Deep Learning: A Pilot Study
Detecting caries lesions of different radiographic extension on bitewings using deep learning
Detection and diagnosis of dental caries using a deep learning-based convolutional neural network algorithm
PaXNet: Dental Caries Detection in Panoramic X-ray using Ensemble Transfer Learning and Capsule Classifier
Intelligent system with dragonfly optimisation for caries detection
Caries detection using multidimensional projection and neural network
Automated feature detection in dental periapical radiographs by using deep learning
Boosting Proximal Dental Caries Detection via Combination of Variational Methods and Convolutional Neural Network
Classification of Dental Diseases U sing CNN and Transfer Learning
Deep Learning for Pixel-based Edge Models Classification of Tertiary Dentine Images
Dental Caries Classification System Using Deep Learning Based Convolutional Neural Network
Dental caries diagnosis in digital radiographs using back-propagation neural network
Detection of Tooth caries in Bitewing Radiographs using Deep Learning
Example Mining for Incremental Learning in Medical Imaging
Genetic Algorithms Based Approach for Dental Caries Detection using Back Propagation Neural Network
Learning compact and discriminative hybrid neural network for dental caries classification
Logit-Based Artificial Bee Colony Optimization (LB-ABC) Approach for Dental Caries Classification Using a Back Propagation Neural Network
A New Linear Adaptive Swarm Intelligence Approach using Back Propagation Neural Network for Dental Caries Classification
Classification of Dental Cavities from X-ray images using Deep CNN algorithm

19 unique studies

Describe type of task

df_only_radiology %>% 
  janitor::tabyl(task) %>% 
  janitor::adorn_pct_formatting() %>% 
  knitr::kable()

task	n	percent
Classification	18	94.7%
Segmentation	1	5.3%

By model

df_only_radiology %>% 
  janitor::tabyl(model_type) %>% 
  knitr::kable()

model_type	n	percent
Auto-Encoder	1	0.0526316
CNN	13	0.6842105
MLP	5	0.2631579

By year

df_only_radiology %>% 
  ggplot(aes(x = year, 
             fill = model_type)) + 
  geom_bar() + 
  labs(title = "Model type by year", 
       y = " Number of studies", 
       x = "Year", 
       fill = "Model type")

Diagnostic threshold

df_only_radiology %>% 
  janitor::tabyl(caries_threshold)%>% 
  janitor::adorn_pct_formatting() %>% 
  knitr::kable()

caries_threshold	n	percent
Enamel	1	5.3%
Enamel/Dentin	6	31.6%
Dentin/Cavitated	4	21.1%
Caries Yes/No	4	21.1%
Unclear	4	21.1%

Describe the annotators: size and details and the % with the agreement reported

Type and number of annotators

df_only_radiology %>%
  count(annotator_details) %>%
  ggplot(aes(x = fct_reorder(annotator_details, desc(n)),
             y = n)) +
  geom_col() +
  scale_y_continuous(breaks = c(0, 2, 5, 8, 10)) +
  geom_text(aes(label = n), vjust = 1.5, colour = "white") +
  labs(title = "Type of annotators",
       x = "Type",
       y = "Number of studies")

df_only_radiology %>% 
  select(annotator_details, annotators) %>% 
  gtsummary::tbl_summary(by = annotator_details) %>% 
  gtsummary::add_overall()

Characteristic	Overall, N = 19¹	Specialists (no Rad), N = 2¹	Dentists, N = 10¹	Specialists (inc. Rad), N = 2¹	Unclear, N = 5¹
annotators
1	6 (46%)	1 (50%)	4 (44%)	0 (0%)	1 (100%)
2	3 (23%)	0 (0%)	3 (33%)	0 (0%)	0 (0%)
3	2 (15%)	0 (0%)	1 (11%)	1 (100%)	0 (0%)
4	2 (15%)	1 (50%)	1 (11%)	0 (0%)	0 (0%)
Unknown	6	0	1	1	4
¹ Statistics presented: n (%)

Number of mean annotators per study

df_only_radiology %>%
  group_by(annotator_details) %>% 
  summarise(mean_annotators = mean(annotators, na.rm = TRUE)) %>% 
  ggplot(aes(x = fct_reorder(annotator_details, mean_annotators), 
             y = mean_annotators)) + 
  geom_col() +
  coord_flip() +
  labs(title = "Mean annotators by type of annotator", 
       x = "Type of annotator", 
       y = "Mean")

df_only_radiology %>% 
  summarise(n = n(), 
            mean = mean(annotators, na.rm = TRUE), 
            sd = sd(annotators, na.rm = TRUE), 
            min = min(annotators, na.rm = TRUE),  
            max = max(annotators, na.rm = TRUE)) %>% 
  knitr::kable()

n	mean	sd	min	max
19	2	1.1547	1	4

Studies with agreement reported

df_only_radiology %>% 
  count(agreement_annotators) %>% 
  knitr::kable()

agreement_annotators	n
98	1
NA	18

Only one study reported the annotators agreement

how many report the prevalence

df_only_radiology %>% 
  count(prevalence) %>% 
  knitr::kable()

prevalence	n
0.5	1
NA	18

Only one study reported the prevalence of caries in the sample size, and the prevalence reported was 50%

Describe the sample of the studies

summary(df_only_radiology$number_samples)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      40     120     480    1235    1869    6000

describe train, test, validation

Of the 19 studies, 14 provide data on the sample partition, and of these only 2 indicate the number of the sample dedicated to validation. There are no studies with external validation

df_only_radiology %>%
  pivot_longer(train:validation,
               names_to = "sample_partition",
               values_to = "values_partition")  %>%
  # select(sample_partition, values_partition) %>%  # do not use
  # gtsummary::tbl_summary(by = sample_partition)   # do not use
  # convert the 0 to NA
  mutate(values_partition = na_if(values_partition, "0")) %>%
  group_by(sample_partition) %>%
  summarise(
    count = sum(!is.na(values_partition)),
    mean = mean(values_partition, na.rm = TRUE),
    median = median(values_partition, na.rm = TRUE),
    sd = sd(values_partition, na.rm = TRUE)
  ) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  knitr::kable()

sample_partition	count	mean	median	sd
test	14	291.71	139.5	334.63
train	14	1231.93	350.0	1524.68
validation	2	166.00	166.0	121.62

df_only_radiology %>%
  pivot_longer(train:validation,
               names_to = "sample_partition",
               values_to = "values_partition")  %>%
  # remove the studies with zero o no report
  mutate(values_partition = na_if(values_partition, "0")) %>% 
  filter(values_partition > 0) %>% 
  ggplot(aes(
    x = fct_reorder(author_date, number_samples),
    y = values_partition, 
    # y = log10(values_partition),
    fill = sample_partition
  )) +
  geom_col() +
  coord_flip() + 
  labs(title = "Dataset size", 
       y = "Size", 
       x = "Study", 
       fill = "Dataset partition")

  # scale_y_continuous(labels = scales::comma) 
  # scale_y_log10()

Sample size per year

df_only_radiology %>%
  mutate(author_date = word(author_date, 1)) %>% 
  mutate(author_date = str_remove(author_date, ",")) %>% 
  mutate(author_date = str_c(author_date, year, sep = ", ")) %>% 
  # group_by(author_date) %>%
  ggplot(aes(x = year,
             y = number_samples)) +
  geom_jitter() +
  scale_y_log10() +
  labs(title = "Dataset size per year",
       y = "Sample size (log10)",
       x = "Year") +
   geom_text_repel(aes(label = author_date), 
                   color = "grey 30", 
                   size = 3.5, 
                   point.size = 2.5, 
                   segment.linetype = "solid")

Diagnostic odds ratio

Diagnostic odds ratio (DOR) is a singleindicator of medical test performance (Afina S.Glas, Jeroen G.Lijmer, M. H.-G. J. B. P. M. B. The diagnostic odds ratio: a single indicator of test performance. Journal of Clinical Epidemiology 56(2003), 1129–1135).

However, the naive estimator funtion of DOR fails when either Sens or Spec is close to one. DOR rises steeply when Sens or Spec becomes nearly perfect. (Kraemer, H.Risk ratios, odds ratio, and the test QRQC.SAGE Publications, Inc., Newbury Park CA, 1992, pp. 103–113.)

Accuracy results

df_only_radiology %>%
  ggplot(aes(x = number_samples,
             y = accuracy))  +
  geom_jitter() +
  labs(title = "Diagnostic Accuracy Results by Diagnostic Threshold",
       x = "Dataset size",
       y = "Reported accuracy")  +
  ylim(0, 1) +
  scale_x_log10() +
  # add the labels
  geom_label_repel(
    aes(label = author_date),
    # box.padding   = 0.35,
    point.padding = 0.5,
    segment.color = 'grey50',
    size = 2.1
  ) +
  facet_grid(caries_threshold ~ .) +
  theme(strip.text.y = element_text(angle=0))

Sensitivity and especificity

Only five studies report sensitivity and specificity values.

df_only_radiology %>% 
  ggplot(aes(x = 1 - specificity, 
             y = sensitivity_recall, 
             size = train ) )  + 
             # color = caries_threshold, 
             # label = author_date)) +
  geom_jitter(alpha = .5) +
  labs(title = "Senstivity and Specificity Results", 
       y = "Sensitivity", 
       x = "1 - Specificity", 
       size = "Dataset size")  +
  ylim(0, 1) + 
  # add the labels
     geom_text_repel(aes(label = author_date), 
                   color = "grey 5", 
                   size = 3.5, 
                   point.size = 2.5, 
                   segment.linetype = "solid")

Relevant points to discuss

Most of the reports are incomplete. Even as diagnostic studies, they do not report values that allow construction of 2x2 table, and partially report metrics of diagnostic studies (sens, spec).
Only one study reports the prevalence of caries in the dataset.
No precise information about calibration, training and characteristics of the assessors.
Only two studies report the size of the validation set and no study to date has used external validation.
the results are too good, which calls for caution. They seem to represent very well the ability of the algorithms to learn from the annotators and repeat them, but not necessarily the ability to correctly diagnose caries. The problem is that we do not know the validation of the annotators.

Diagnostic accuracy of AI for radiographic caries detection