Packages

Use tidiverse, meta, metafor, metamisc, janitor

Dataset

Load the dataset

Set theme for the figures

theme_set(theme_minimal())

Variables

names(df)
##  [1] "Author, Date"                 "Check"                       
##  [3] "Ref"                          "Modality"                    
##  [5] "Model Type"                   "Optimizer/Architecture/Other"
##  [7] "Activation function"          "Task"                        
##  [9] "Task Desc"                    "Caries threshold"            
## [11] "Annotators"                   "Annotator details"           
## [13] "Type of annotators"           "Agreement annotators"        
## [15] "TP"                           "TN"                          
## [17] "FP"                           "FN"                          
## [19] "# Samples"                    "train"                       
## [21] "test"                         "validation"                  
## [23] "Precision"                    "Specificity"                 
## [25] "Sensitivity (Recall)"         "Accuracy"                    
## [27] "Prevalence"                   "PPV"                         
## [29] "NPV"                          "mAP mean average precision"  
## [31] "IoU intersection over union"  "F1 - Dice"                   
## [33] "Comments"

Type of variables

glimpse(df)
## Rows: 47
## Columns: 33
## $ `Author, Date`                 <chr> "Salehi, H. S., et al. 2019", "Salehi, …
## $ Check                          <chr> "Deep learning-based quantitative analy…
## $ Ref                            <dbl> 13, 38, 38, 1, 34, 33, 34, 34, 34, 10, …
## $ Modality                       <chr> "OCT - Images", "OCT - Images", "OCT - …
## $ `Model Type`                   <chr> "CNN", "CNN", "CNN", "CNN", "CNN", "CNN…
## $ `Optimizer/Architecture/Other` <chr> NA, "Softmax 2 FC + 3 CNN", "Softmax 3 …
## $ `Activation function`          <chr> "ReLU", "ReLU", "ReLU", NA, NA, "ReLU",…
## $ Task                           <chr> "Classification", "Classification", "Cl…
## $ `Task Desc`                    <chr> "Caries detection", "Caries detection",…
## $ `Caries threshold`             <chr> "enamel/dentin", "enamel", "enamel", "e…
## $ Annotators                     <dbl> NA, NA, NA, 3, NA, 2, NA, NA, NA, NA, N…
## $ `Annotator details`            <chr> "Unclear", "Unclear", "Unclear", "Denti…
## $ `Type of annotators`           <chr> "Unclear", "Unclear", "Unclear", "Denti…
## $ `Agreement annotators`         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ TP                             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ TN                             <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FP                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ FN                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `# Samples`                    <dbl> 51, 81, 81, 7200, 81, NA, 81, 81, 81, 4…
## $ train                          <dbl> NA, 60, 60, 6000, 60, NA, 60, 60, 60, N…
## $ test                           <dbl> NA, 21, 21, 1200, 21, NA, 21, 21, 21, N…
## $ validation                     <dbl> NA, 0, 0, 0, 0, NA, 0, 0, 0, NA, 0, 0, …
## $ Precision                      <dbl> 0.999, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Specificity                    <dbl> 0.998, 0.990, 0.972, 0.972, 0.962, 0.96…
## $ `Sensitivity (Recall)`         <dbl> 0.985, 0.582, 0.809, 0.963, 0.758, 0.89…
## $ Accuracy                       <dbl> 0.910, 0.827, 0.907, 0.945, 0.887, 0.95…
## $ Prevalence                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ PPV                            <dbl> 0.999, 0.974, 0.941, NA, 0.820, NA, 0.6…
## $ NPV                            <dbl> 0.972, 0.782, 0.885, NA, 0.873, NA, 0.6…
## $ `mAP mean average precision`   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `IoU intersection over union`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `F1 - Dice`                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ Comments                       <chr> NA, "mAP, IoU :unclear, used the last r…

Data cleaning

df <- df %>% 
  select(-Comments) %>% 
  janitor::clean_names() 

Create a new column to extract only the radiology

df <- df %>% 
  separate(modality, into = c("source", "type"), sep = "-")
df <- df %>% 
  mutate(type = str_trim(type, side = c("both"))) #removing some white spaces

Order the levels for caries threshold

table(df$caries_threshold)
## 
##                                        caries yes/no 
##                                                   10 
##                                            cavitated 
##                                                    4 
##                                               dentin 
##                                                    4 
##                                               enamel 
##                                                    4 
##                                        enamel/dentin 
##                                                   15 
## healthy/white spot/ dentin cavitated/micro-cavitated 
##                                                    1 
##                                              Unclear 
##                                                    7
df <- df %>% 
  mutate(caries_threshold = forcats::fct_relevel(caries_threshold, 
                       "enamel", 
                       "enamel/dentin", 
                       "healthy/white spot/ dentin cavitated/micro-cavitated", 
                       "dentin", 
                       "cavitated", 
                       "caries yes/no")
  )

Recode the certified dentists to dentists

table(df$annotator_details)
## 
##        Cariology specialista (min 3 years) 
##                                          1 
##                         Certified dentists 
##                                          2 
##                                   Dentists 
##                                         12 
##           dentists (8-11 years experience) 
##                                          1 
##           Dentists (8-11 years experience) 
##                                          2 
##                  Dentists and radiologists 
##                                          3 
##                        Experienced Dentist 
##                                          1 
##                            Expert dentists 
##                                          1 
## Oral Pathologist, Endodoncist, Radiologist 
##                                          1 
##                                Specialists 
##                                          1 
##                                    Unclear 
##                                         22
df$annotator_details <-
  forcats::fct_recode(
    df$annotator_details,
    "Specialists (no Rad)" = "Cariology specialista (min 3 years)",
    "Specialists (no Rad)" = "Specialists", 
    "Specialists (inc. Rad)" = "Oral Pathologist, Endodoncist, Radiologist", 
    "Specialists (inc. Rad)" = "Dentists and radiologists", 
    "Dentists" = "Certified dentists" ,
    "Dentists" = "Dentists (8-11 years experience)",
    "Dentists" = "dentists (8-11 years experience)"   ,
    "Dentists" =  "Experienced Dentist"                  ,
    "Dentists" =  "Expert dentists"
  )

Unify the caries thresholds

df <- df %>% 
  # change the letters
  mutate(caries_threshold = str_to_title(caries_threshold)) %>% 
  # add the NA to unclear
  mutate(caries_threshold = replace_na(caries_threshold, "Unclear")) %>% 
  # recode
  mutate(caries_threshold = fct_recode(
    caries_threshold, 
    "Enamel/Dentin" = "Healthy/White Spot/ Dentin Cavitated/Micro-Cavitated", 
    "Dentin/Cavitated" = "Cavitated", 
    "Dentin/Cavitated" = "Dentin"
  )) %>% 
  # relevel
  mutate(caries_threshold = fct_relevel(caries_threshold, 
                                        "Enamel", 
                                        "Enamel/Dentin", 
                                        "Dentin/Cavitated", 
                                        "Caries Yes/No", 
                                        "Unclear"))

Extract the year

df <- df %>% 
  mutate(year = as.numeric(str_extract(author_date, "[0-9]+"))) %>% 
  relocate(year, .after = author_date)

EDA

How many studies?

n_distinct(df$ref) %>% 
  knitr::kable()
x
33

By modality and model_type

Not grouped by study

df %>% 
  janitor::tabyl(type, model_type) %>% 
  knitr::kable()
type Auto-Encoder CNN MLP PNN
Images 0 22 2 1
Radiology 1 15 6 0

By diagnostic task

df %>% 
   janitor::tabyl(task_desc, type) %>% 
  knitr::kable()
task_desc Images Radiology
Caries detection 25 19
Periapical infection 0 1
Periodontitis 0 1
Unclear 0 1

By task

df %>% 
  janitor::tabyl(task_desc, task) %>% 
  knitr::kable()
task_desc Classification Segmentation
Caries detection 43 1
Periapical infection 1 0
Periodontitis 1 0
Unclear 1 0

samples by model type

df %>%
  group_by(model_type) %>%
  summarise(
    n = n(),
    sample_size_mean = mean(number_samples,  na.rm = TRUE),
    sd = sd(number_samples,  na.rm = TRUE) 
  ) %>% 
  knitr::kable()
model_type n sample_size_mean sd
Auto-Encoder 1 480.0000 NA
CNN 37 935.4167 1692.4464
MLP 8 489.1250 680.0066
PNN 1 400.0000 NA
df %>%
  group_by(task_desc) %>%
  summarise(
    n = n(),
    sample_size_mean = mean(number_samples,  na.rm = TRUE),
    sd = sd(number_samples,  na.rm = TRUE)
  ) %>% 
  knitr::kable()
task_desc n sample_size_mean sd
Caries detection 44 888.093 1568.688
Periapical infection 1 80.000 NA
Periodontitis 1 80.000 NA
Unclear 1 120.000 NA
df %>% 
  ggplot(aes(x = number_samples)) +
  geom_histogram(bins = 6) + 
  scale_x_log10() + 
  facet_grid(model_type ~ .) +
  labs(x = "Sample size")

Check sample size, ordered desc

df %>% 
  arrange(desc(number_samples)) %>% 
  select(author_date, task_desc, number_samples, specificity, sensitivity_recall) %>% 
  knitr::kable()
author_date task_desc number_samples specificity sensitivity_recall
Wang et al. 2020 Caries detection 7200 0.972 0.963
Kumar and Srivastava, 2018 Caries detection 6000 NA 0.730
Garcia Cantu et al., 2020 Caries detection 3686 0.830 0.750
Lee, J. H., et al. 2018 Caries detection 3000 0.830 0.810
Srivastava et al. 2017 Caries detection 3000 NA 0.081
Guijarro-Rodríguez, A. A., et al. 2020 Caries detection 2030 NA NA
Lakshmi, M. M., et al. 2021 Caries detection 1900 NA 0.842
Haghanif-ar, A., et al. 2020 Caries detection 1838 0.507 NA
Singh, P. and P. Sehgal, 2021 Caries detection 1500 0.930 0.960
Holtkamp, A., et al. 2021 Caries detection 1319 0.650 0.550
Tripathi, P., et al. 2019 Caries detection 800 NA NA
Riyadi, S., et al. 2020 Caries detection 660 NA NA
Charvát, J., et al. 2020 Caries detection 578 NA NA
Vinayahalingam, S., et al. 2021 Caries detection 500 0.860 0.870
Leo, L. M. and T. K. Reddy, 2021 Caries detection 480 NA NA
Choi, Eun and Kim, 2018 Caries detection 475 NA NA
Megalan Leo, L. and T. Kalpalatha Reddy, 2020 Caries detection 418 NA NA
Singh, P. and P. Sehgal, 2020 Caries detection 400 0.930 0.930
Schwendicke, F., et al. 2020a Caries detection 226 0.850 0.460
Schwendicke, F., et al. 2020b Caries detection 226 0.760 0.590
Khan et al. 2020 Caries detection 206 NA NA
Patil et al. 2018 Unclear 120 0.750 0.967
Sornam, M. and M. Prabhakaran, 2019 Caries detection 120 NA NA
Sornam, M. and M. Prabhakaran, 2018 Caries detection 120 NA NA
Geetha, V., et al. 2020 Caries detection 105 NA NA
Salehi, H., et al. 2021b Caries detection 81 0.990 0.582
Salehi, H., et al. 2021e Caries detection 81 0.972 0.809
Salehi, H. S., et al. 2020d Caries detection 81 0.962 0.758
Salehi, H. S., et al. 2020a Caries detection 81 0.950 0.172
Salehi, H. S., et al. 2020c Caries detection 81 0.933 0.758
Salehi, H. S., et al. 2020e Caries detection 81 0.930 0.813
Salehi, H., et al. 2021d Caries detection 81 0.915 0.856
Salehi, H. S., et al. 2020b Caries detection 81 0.912 0.651
Salehi, H., et al. 2021f Caries detection 81 0.908 0.952
Salehi, H., et al. 2021a Caries detection 81 0.896 0.645
Salehi, H. S., et al. 2020f Caries detection 81 0.895 0.632
Salehi, H. S., et al. 2020g Caries detection 81 0.887 0.514
Salehi, H., et al. 2021c Caries detection 81 0.696 0.971
Prajapati, S. A., et al. 2017 Caries detection 80 NA NA
Prajapati, S. A., et al. 2017 Periapical infection 80 NA NA
Prajapati, S. A., et al. 2017 Periodontitis 80 NA NA
Sonavane, A., et al. 2021 Caries detection 74 NA NA
Yu-Ping, H., et al. 2020 Caries detection 63 0.898 0.989
Salehi, H. S., et al. 2019 Caries detection 51 0.998 0.985
Patil, S., et al. 2019 Caries detection 40 0.500 1.000
Patil et al. 2018 Caries detection 40 0.250 0.880
Yu et al., 2020 Caries detection NA 0.961 0.899

Variables creation

See Welton NJ, McAleenan A, Thom HHZ, et al. (2017) Methods for Meta-Analysis of the Diagnostic Test Accuracy Studies. NIHR Journals Library. https://www.ncbi.nlm.nih.gov/books/NBK436343/

df %>% 
  # select(tp:fn) %>% 
  # summary()
  pivot_longer(cols = tp:fn, 
               names_to = "matrix_names", 
               values_to = "matrix_values") %>% 
  ggplot(aes(x = matrix_values)) + 
  geom_histogram() + 
  # geom_boxplot() + 
  # geom_jitter() + 
  # scale_y_log10()
  facet_grid(matrix_names ~ .)

Meta-analysis (all studies!)

Calculate the diagnostic odds ratio (DOR)

DOR = (Spec. * Sen.) / ((1 - Spec.) * (1 - Sen.)) See Glas, A.S., Lijmer, J.G., Prins, M.H., Bonsel, G.J., and Bossuyt, P.M.M., 2003. The diagnostic odds ratio: a single indicator of test performance. Journal of clinical epidemiology, 56 (11), 1129–1135.

Create the DOR

df <- df %>% 
  mutate(dor = (specificity * sensitivity_recall) / ((1 - specificity) * (1 - sensitivity_recall)))
df %>% 
  filter(task_desc == "Caries detection" & dor > 0) %>% 
  ggplot(aes(x = dor)) +
  geom_histogram(bins = 20) +
  scale_x_log10() +
  facet_grid(caries_threshold ~ .) + 
  labs(title = "Diagnostic odds ratio (DOR) by study and diagnostic threshold", 
       y = "Number of studies", 
       x = "log10(DOR)") +
  theme(strip.text.y = element_text(angle=0))

df %>% 
  arrange(desc(dor)) %>% 
  select(author_date, sensitivity_recall, specificity, task_desc,  dor) %>% 
  filter(dor > 0 ) %>% 
  knitr::kable()
author_date sensitivity_recall specificity task_desc dor
Patil, S., et al. 2019 1.000 0.500 Caries detection Inf
Salehi, H. S., et al. 2019 0.985 0.998 Caries detection 32767.666667
Wang et al. 2020 0.963 0.972 Caries detection 903.509653
Yu-Ping, H., et al. 2020 0.989 0.898 Caries detection 791.552585
Singh, P. and P. Sehgal, 2021 0.960 0.930 Caries detection 318.857143
Yu et al., 2020 0.899 0.961 Caries detection 219.329525
Salehi, H., et al. 2021f 0.952 0.908 Caries detection 195.746377
Singh, P. and P. Sehgal, 2020 0.930 0.930 Caries detection 176.510204
Salehi, H., et al. 2021e 0.809 0.972 Caries detection 147.035901
Salehi, H., et al. 2021b 0.582 0.990 Caries detection 137.842105
Patil et al. 2018 0.967 0.750 Unclear 87.909091
Salehi, H. S., et al. 2020d 0.758 0.962 Caries detection 79.294911
Salehi, H., et al. 2021c 0.971 0.696 Caries detection 76.657895
Salehi, H., et al. 2021d 0.856 0.915 Caries detection 63.990196
Salehi, H. S., et al. 2020e 0.813 0.930 Caries detection 57.760886
Salehi, H. S., et al. 2020c 0.758 0.933 Caries detection 43.617491
Vinayahalingam, S., et al. 2021 0.870 0.860 Caries detection 41.109890
Lee, J. H., et al. 2018 0.810 0.830 Caries detection 20.814242
Salehi, H. S., et al. 2020b 0.651 0.912 Caries detection 19.331597
Salehi, H., et al. 2021a 0.645 0.896 Caries detection 15.653304
Garcia Cantu et al., 2020 0.750 0.830 Caries detection 14.647059
Salehi, H. S., et al. 2020f 0.632 0.895 Caries detection 14.638716
Salehi, H. S., et al. 2020g 0.514 0.887 Caries detection 8.301795
Schwendicke, F., et al. 2020a 0.460 0.850 Caries detection 4.827160
Schwendicke, F., et al. 2020b 0.590 0.760 Caries detection 4.556911
Salehi, H. S., et al. 2020a 0.172 0.950 Caries detection 3.946860
Patil et al. 2018 0.880 0.250 Caries detection 2.444444
Holtkamp, A., et al. 2021 0.550 0.650 Caries detection 2.269841

Crazy results! DOR = 32767!! and even infinite

Check the formula

Compare original formula against https://doi.org/10.1080/03610918.2016.1157183

df %>% 
  mutate(dor_2 = ((sensitivity_recall / (1 - sensitivity_recall)) / ((1 - specificity) / specificity))) %>% 
  ggplot(aes(x = dor, 
             y = dor_2)) +
  geom_point() +
  scale_y_log10() + 
  scale_x_log10()

Why??

DOR rises steeply when sensitivity or specificity becomes nearly perfect ( Kraemer, H.Risk ratios, odds ratio, and the test QRQC.SAGE Publications, Inc., NewburyPark CA, 1992, pp. 103–113.)

Calculate the confidence interval for the DOR

Comparing tests using sensitivity and specificity (bivariate model)

https://methods.cochrane.org/sites/methods.cochrane.org.sdt/files/public/uploads/DTA%20Handbook%20Chapter%2011%20201312.pdf

For the bivariate analysis the following statistics will be reported with confidence intervals: ï‚· Estimates of the average sensitivity and specificity for each test ï‚· Estimates of the relative sensitivity and relative specificity expressed as odds ratios ï‚· P-values for the difference in sensitivity and for the difference in specificity

When the bivariate method has been used, the magnitude and direction of the difference between tests can be summarised either by reporting point estimates of the average sensitivity and specificity for the two tests, or measures of relative test sensitivity and specificity (relative measures are computed on a logit scale, and thus are technically odds ratios). It is not possible to directly translate relative measures of accuracy to the consequences of using one or other test. Therefore focusing on the size and significance (P-values) of any difference in estimates of average sensitivity and specificity between tests is likely to be the most accessible way of illustrating the potential impact of using different tests.

Final report: only caries & radiology detection

Filter only caries task

table(df$task_desc, df$type) %>% 
  knitr::kable()
Images Radiology
Caries detection 25 19
Periapical infection 0 1
Periodontitis 0 1
Unclear 0 1
df_only_radiology <- df %>% 
  filter(task_desc == "Caries detection" & 
           type == "Radiology")

How many unique studies?

df_only_radiology %>% 
  distinct(check) %>% 
  knitr::kable()
check
The Automatic Detection of Caries in Third Molars on Panoramic Radiographs Using Deep Learning: A Pilot Study
Detecting caries lesions of different radiographic extension on bitewings using deep learning
Detection and diagnosis of dental caries using a deep learning-based convolutional neural network algorithm
PaXNet: Dental Caries Detection in Panoramic X-ray using Ensemble Transfer Learning and Capsule Classifier
Intelligent system with dragonfly optimisation for caries detection
Caries detection using multidimensional projection and neural network
Automated feature detection in dental periapical radiographs by using deep learning
Boosting Proximal Dental Caries Detection via Combination of Variational Methods and Convolutional Neural Network
Classification of Dental Diseases U sing CNN and Transfer Learning
Deep Learning for Pixel-based Edge Models Classification of Tertiary Dentine Images
Dental Caries Classification System Using Deep Learning Based Convolutional Neural Network
Dental caries diagnosis in digital radiographs using back-propagation neural network
Detection of Tooth caries in Bitewing Radiographs using Deep Learning
Example Mining for Incremental Learning in Medical Imaging
Genetic Algorithms Based Approach for Dental Caries Detection using Back Propagation Neural Network
Learning compact and discriminative hybrid neural network for dental caries classification
Logit-Based Artificial Bee Colony Optimization (LB-ABC) Approach for Dental Caries Classification Using a Back Propagation Neural Network
A New Linear Adaptive Swarm Intelligence Approach using Back Propagation Neural Network for Dental Caries Classification
Classification of Dental Cavities from X-ray images using Deep CNN algorithm

19 unique studies

Describe type of task

df_only_radiology %>% 
  janitor::tabyl(task) %>% 
  janitor::adorn_pct_formatting() %>% 
  knitr::kable()
task n percent
Classification 18 94.7%
Segmentation 1 5.3%

By model

df_only_radiology %>% 
  janitor::tabyl(model_type) %>% 
  knitr::kable()
model_type n percent
Auto-Encoder 1 0.0526316
CNN 13 0.6842105
MLP 5 0.2631579

By year

df_only_radiology %>% 
  ggplot(aes(x = year, 
             fill = model_type)) + 
  geom_bar() + 
  labs(title = "Model type by year", 
       y = " Number of studies", 
       x = "Year", 
       fill = "Model type")

Diagnostic threshold

df_only_radiology %>% 
  janitor::tabyl(caries_threshold)%>% 
  janitor::adorn_pct_formatting() %>% 
  knitr::kable()
caries_threshold n percent
Enamel 1 5.3%
Enamel/Dentin 6 31.6%
Dentin/Cavitated 4 21.1%
Caries Yes/No 4 21.1%
Unclear 4 21.1%

Describe the annotators: size and details and the % with the agreement reported

Type and number of annotators

df_only_radiology %>%
  count(annotator_details) %>%
  ggplot(aes(x = fct_reorder(annotator_details, desc(n)),
             y = n)) +
  geom_col() +
  scale_y_continuous(breaks = c(0, 2, 5, 8, 10)) +
  geom_text(aes(label = n), vjust = 1.5, colour = "white") +
  labs(title = "Type of annotators",
       x = "Type",
       y = "Number of studies") 

df_only_radiology %>% 
  select(annotator_details, annotators) %>% 
  gtsummary::tbl_summary(by = annotator_details) %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 191 Specialists (no Rad), N = 21 Dentists, N = 101 Specialists (inc. Rad), N = 21 Unclear, N = 51
annotators
1 6 (46%) 1 (50%) 4 (44%) 0 (0%) 1 (100%)
2 3 (23%) 0 (0%) 3 (33%) 0 (0%) 0 (0%)
3 2 (15%) 0 (0%) 1 (11%) 1 (100%) 0 (0%)
4 2 (15%) 1 (50%) 1 (11%) 0 (0%) 0 (0%)
Unknown 6 0 1 1 4

1 Statistics presented: n (%)

Number of mean annotators per study

df_only_radiology %>%
  group_by(annotator_details) %>% 
  summarise(mean_annotators = mean(annotators, na.rm = TRUE)) %>% 
  ggplot(aes(x = fct_reorder(annotator_details, mean_annotators), 
             y = mean_annotators)) + 
  geom_col() +
  coord_flip() +
  labs(title = "Mean annotators by type of annotator", 
       x = "Type of annotator", 
       y = "Mean")

df_only_radiology %>% 
  summarise(n = n(), 
            mean = mean(annotators, na.rm = TRUE), 
            sd = sd(annotators, na.rm = TRUE), 
            min = min(annotators, na.rm = TRUE),  
            max = max(annotators, na.rm = TRUE)) %>% 
  knitr::kable()
n mean sd min max
19 2 1.1547 1 4

Studies with agreement reported

df_only_radiology %>% 
  count(agreement_annotators) %>% 
  knitr::kable()
agreement_annotators n
98 1
NA 18

Only one study reported the annotators agreement

how many report the prevalence

df_only_radiology %>% 
  count(prevalence) %>% 
  knitr::kable()
prevalence n
0.5 1
NA 18

Only one study reported the prevalence of caries in the sample size, and the prevalence reported was 50%

Describe the sample of the studies

summary(df_only_radiology$number_samples)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      40     120     480    1235    1869    6000

describe train, test, validation

Of the 19 studies, 14 provide data on the sample partition, and of these only 2 indicate the number of the sample dedicated to validation. There are no studies with external validation

df_only_radiology %>%
  pivot_longer(train:validation,
               names_to = "sample_partition",
               values_to = "values_partition")  %>%
  # select(sample_partition, values_partition) %>%  # do not use
  # gtsummary::tbl_summary(by = sample_partition)   # do not use
  # convert the 0 to NA
  mutate(values_partition = na_if(values_partition, "0")) %>%
  group_by(sample_partition) %>%
  summarise(
    count = sum(!is.na(values_partition)),
    mean = mean(values_partition, na.rm = TRUE),
    median = median(values_partition, na.rm = TRUE),
    sd = sd(values_partition, na.rm = TRUE)
  ) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  knitr::kable()
sample_partition count mean median sd
test 14 291.71 139.5 334.63
train 14 1231.93 350.0 1524.68
validation 2 166.00 166.0 121.62
df_only_radiology %>%
  pivot_longer(train:validation,
               names_to = "sample_partition",
               values_to = "values_partition")  %>%
  # remove the studies with zero o no report
  mutate(values_partition = na_if(values_partition, "0")) %>% 
  filter(values_partition > 0) %>% 
  ggplot(aes(
    x = fct_reorder(author_date, number_samples),
    y = values_partition, 
    # y = log10(values_partition),
    fill = sample_partition
  )) +
  geom_col() +
  coord_flip() + 
  labs(title = "Dataset size", 
       y = "Size", 
       x = "Study", 
       fill = "Dataset partition")

  # scale_y_continuous(labels = scales::comma) 
  # scale_y_log10()

Sample size per year

df_only_radiology %>%
  mutate(author_date = word(author_date, 1)) %>% 
  mutate(author_date = str_remove(author_date, ",")) %>% 
  mutate(author_date = str_c(author_date, year, sep = ", ")) %>% 
  # group_by(author_date) %>%
  ggplot(aes(x = year,
             y = number_samples)) +
  geom_jitter() +
  scale_y_log10() +
  labs(title = "Dataset size per year",
       y = "Sample size (log10)",
       x = "Year") +
   geom_text_repel(aes(label = author_date), 
                   color = "grey 30", 
                   size = 3.5, 
                   point.size = 2.5, 
                   segment.linetype = "solid")

Diagnostic odds ratio

Diagnostic odds ratio (DOR) is a singleindicator of medical test performance (Afina S.Glas, Jeroen G.Lijmer, M. H.-G. J. B. P. M. B. The diagnostic odds ratio: a single indicator of test performance. Journal of Clinical Epidemiology 56(2003), 1129–1135).

However, the naive estimator funtion of DOR fails when either Sens or Spec is close to one. DOR rises steeply when Sens or Spec becomes nearly perfect. (Kraemer, H.Risk ratios, odds ratio, and the test QRQC.SAGE Publications, Inc., Newbury Park CA, 1992, pp. 103–113.)

Accuracy results

df_only_radiology %>%
  ggplot(aes(x = number_samples,
             y = accuracy))  +
  geom_jitter() +
  labs(title = "Diagnostic Accuracy Results by Diagnostic Threshold",
       x = "Dataset size",
       y = "Reported accuracy")  +
  ylim(0, 1) +
  scale_x_log10() +
  # add the labels
  geom_label_repel(
    aes(label = author_date),
    # box.padding   = 0.35,
    point.padding = 0.5,
    segment.color = 'grey50',
    size = 2.1
  ) +
  facet_grid(caries_threshold ~ .) +
  theme(strip.text.y = element_text(angle=0))

Sensitivity and especificity

Only five studies report sensitivity and specificity values.

df_only_radiology %>% 
  ggplot(aes(x = 1 - specificity, 
             y = sensitivity_recall, 
             size = train ) )  + 
             # color = caries_threshold, 
             # label = author_date)) +
  geom_jitter(alpha = .5) +
  labs(title = "Senstivity and Specificity Results", 
       y = "Sensitivity", 
       x = "1 - Specificity", 
       size = "Dataset size")  +
  ylim(0, 1) + 
  # add the labels
     geom_text_repel(aes(label = author_date), 
                   color = "grey 5", 
                   size = 3.5, 
                   point.size = 2.5, 
                   segment.linetype = "solid")

Relevant points to discuss

  • Most of the reports are incomplete. Even as diagnostic studies, they do not report values that allow construction of 2x2 table, and partially report metrics of diagnostic studies (sens, spec).
  • Only one study reports the prevalence of caries in the dataset.
  • No precise information about calibration, training and characteristics of the assessors.
  • Only two studies report the size of the validation set and no study to date has used external validation.
  • the results are too good, which calls for caution. They seem to represent very well the ability of the algorithms to learn from the annotators and repeat them, but not necessarily the ability to correctly diagnose caries. The problem is that we do not know the validation of the annotators.