pacman::p_load(tidyverse, 
               gtsummary,
               ggcharts , # see https://thomas-neitmann.github.io/ggcharts/reference/dumbbell_chart.html
               dlookr, 
               scales, # for the % scales in ggplot2
               patchwork, # for multiple plots
               santoku, # to break variables
               DescTools, # for agreement
               irr, # for agreement
               janitor)

For wordcloud

pacman::p_load(wordcloud, # word-cloud generator 
               SnowballC, # for text stemming
               RColorBrewer, # palette
               wordcloud2, 
               tm) # for text mining
theme_set(theme_minimal())

Dataset

df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vRKPzFe2lbF87DNe9SBjuaIb5iMb4nCmgvfdyT4v-NjK-BZBR-HkUIUdgiML3t30EqQ1RCep0sExatK/pub?output=csv")
names(df)
##  [1] "Laika zīmogs"                                                                                                                                 
##  [2] "TITLE"                                                                                                                                        
##  [3] "Reviewer"                                                                                                                                     
##  [4] "AUTHORS"                                                                                                                                      
##  [5] "JOURNAL"                                                                                                                                      
##  [6] "YEAR published"                                                                                                                               
##  [7] "Number of patients"                                                                                                                           
##  [8] "Main focus of the report"                                                                                                                     
##  [9] "Copy and paste the abstract"                                                                                                                  
## [10] "Country of the correspondence author"                                                                                                         
## [11] "CARE Appraisal [Title 1 The diagnosis or intervention of primary focus followed by the words “case report”]"                                  
## [12] "CARE Appraisal [Key Words 2 2 to 5 key words that identify diagnoses or interventions in this case report, including \"case report\"]"        
## [13] "CARE Appraisal [Abstract 3a Introduction: What is unique about this case and what does it add to the scientific literature?]"                 
## [14] "CARE Appraisal [Abstract 3b Main symptoms and/or important clinical findings]"                                                                
## [15] "CARE Appraisal [Abstract 3c The main diagnoses, therapeutic interventions, and outcomes]"                                                     
## [16] "CARE Appraisal [Abstract 3d Conclusion—What is the main “take-away” lesson(s) from this case?]"                                               
## [17] "CARE Appraisal [Introduction 4 One or two paragraphs summarizing why this case is unique (may include references)]"                           
## [18] "CARE Appraisal [Patient Information 5a De-identified patient specific information.]"                                                          
## [19] "CARE Appraisal [5b Primary concerns and symptoms of the patient.]"                                                                            
## [20] "CARE Appraisal [5c Medical, family, and psycho-social history including relevant genetic information]"                                        
## [21] "CARE Appraisal [5d Relevant past interventions with outcomes]"                                                                                
## [22] "CARE Appraisal [Clinical Findings 6 Describe significant physical examination (PE) and important clinical findings.]"                         
## [23] "CARE Appraisal [Timeline 7 Historical and current information from this episode of care organized as a timeline]"                             
## [24] "CARE Appraisal [Diagnostic Assessment 8a Diagnostic testing (such as PE, laboratory testing, imaging, surveys).]"                             
## [25] "CARE Appraisal [Diagnostic Assessment 8b Diagnostic challenges (such as access to testing, financial, or cultural)]"                          
## [26] "CARE Appraisal [8c Diagnosis (including other diagnoses considered)]"                                                                         
## [27] "CARE Appraisal [Therapeutic Intervention  Diagnostic Assessment 8d Prognosis (such as staging in oncology) where applicable]"                 
## [28] "CARE Appraisal [Therapeutic Intervention  9a Types of therapeutic intervention (such as pharmacologic, surgical, preventive, self-care)]"     
## [29] "CARE Appraisal [Therapeutic Intervention  9b Administration of therapeutic intervention (such as dosage, strength, duration)]"                
## [30] "CARE Appraisal [Therapeutic Intervention  9c Changes in therapeutic intervention (with rationale)]"                                           
## [31] "CARE Appraisal [Follow-up and Outcomes 10a Clinician and patient-assessed outcomes (if available).]"                                          
## [32] "CARE Appraisal [Follow-up and Outcomes 10b Important follow-up diagnostic and other test results]"                                            
## [33] "CARE Appraisal [Follow-up and Outcomes 10c Intervention adherence and tolerability (How was this assessed?)]"                                 
## [34] "CARE Appraisal [Follow-up and Outcomes 10d Adverse and unanticipated events]"                                                                 
## [35] "CARE Appraisal [Discussion 11a A scientific discussion of the strengths AND limitations associated with this case report]"                    
## [36] "CARE Appraisal [Discussion 11b Discussion of the relevant medical literature with references.]"                                               
## [37] "CARE Appraisal [Discussion 11c The scientific rationale for any conclusions (including assessment of possible causes)]"                       
## [38] "CARE Appraisal [Discussion 11d The primary “take-away” lessons of this case report (without references) in a one paragraph conclusion]"       
## [39] "CARE Appraisal [Patient Perspective 12 The patient should share their perspective in one to two paragraphs on the treatment(s) they received]"
## [40] "CARE Appraisal [Informed Consent 13 Did the patient give informed consent? Please provide if requested]"                                      
## [41] "Comments"                                                                                                                                     
## [42] "Declared study design"                                                                                                                        
## [43] "Males reported"                                                                                                                               
## [44] "Females reported"                                                                                                                             
## [45] "Age"                                                                                                                                          
## [46] "Race (if reported)"                                                                                                                           
## [47] "ID"

For the wordcloud later

text <- df$`Copy and paste the abstract`
dlookr::diagnose(df)
## # A tibble: 47 × 6
##    variables        types missing_count missing_percent unique_count unique_rate
##    <chr>            <chr>         <int>           <dbl>        <int>       <dbl>
##  1 Laika zīmogs     char…             0               0          204      1     
##  2 TITLE            char…             0               0          105      0.515 
##  3 Reviewer         char…             0               0            3      0.0147
##  4 AUTHORS          char…             0               0          128      0.627 
##  5 JOURNAL          char…             0               0           76      0.373 
##  6 YEAR published   nume…             0               0           38      0.186 
##  7 Number of patie… nume…             0               0            7      0.0343
##  8 Main focus of t… char…             0               0            4      0.0196
##  9 Copy and paste … char…             0               0          175      0.858 
## 10 Country of the … char…             0               0           23      0.113 
## # … with 37 more rows
df <- df %>%
  mutate(JOURNAL = str_to_title(JOURNAL)) %>% # change the capitalization
  mutate(JOURNAL = str_trim(JOURNAL, side = c("both"))) %>% # remove spaces
  mutate(JOURNAL = str_replace(JOURNAL, "Resaerch", "Research"))  %>% 
  mutate(JOURNAL = str_replace(JOURNAL, "Otolaryngology-Head And Neck Surgery", "Otolaryngology–Head And Neck Surgery")) %>% 
  mutate(JOURNAL = str_replace(JOURNAL, "Otolaryngology–Head And Neck Surgery", "Archives Of Otolaryngology--Head & Neck Surgery")) %>%
  mutate(JOURNAL = str_replace(JOURNAL, "The Journal Of Laryngology & Otology", "The Journal Of Laryngology And Otology"))

Check agreement

How many raters?

df %>% 
  tabyl(Reviewer)
##  Reviewer   n     percent
##        IA 101 0.495098039
##        PJ 101 0.495098039
##        SU   2 0.009803922

Kappa

df %>% 
  select(Reviewer, ID,  `CARE Appraisal [Abstract 3b Main symptoms and/or important clinical findings]`:`CARE Appraisal [Key Words 2 2 to 5 key words that identify diagnoses or interventions in this case report, including "case report"]`) %>% # leave only the relevant columns
  filter(Reviewer != "SU") %>%  # remove SU from the rater 
  
  # now reshape the dataset to obtain three columns
  pivot_longer(-c(Reviewer, ID), 
               names_to = "Care_item", 
               values_to = "Care_values") %>% 
  select(-c(Care_item)) %>% 
  
  # now reshape again to obtains the values per rater
  pivot_wider(id_cols = ID, 
              names_from = Reviewer, 
              values_from = Care_values) %>% 
  tidyr::unnest() %>%  # this is to recover the values, check shorturl.at/gpAG3 
  filter(!is.na(IA)) %>% 
  filter(!is.na(PJ)) %>% 
  select(-ID) %>% 
  kappam.fleiss(., detail=TRUE)
##  Fleiss' Kappa for m Raters
## 
##  Subjects = 267 
##    Raters = 2 
##     Kappa = 0.715 
## 
##         z = 11.7 
##   p-value = 0 
## 
##      Kappa      z p.value
## No   0.715 11.681   0.000
## Yes  0.715 11.681   0.000

The kappa between the rater is .715

Since they are comparable, I will leave only one + SU

df <- df %>% 
  filter(Reviewer %in% c('IA', 'SU'))

add a continent column

countries <- read_csv("https://raw.githubusercontent.com/dbouquin/IS_608/master/NanosatDB_munging/Countries-Continents.csv")
glimpse(countries)
## Rows: 194
## Columns: 2
## $ Continent <chr> "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", …
## $ Country   <chr> "Algeria", "Angola", "Benin", "Botswana", "Burkina", "Burund…
df <- df %>% 
  mutate(`Country of the correspondence author` = (str_replace_all(`Country of the correspondence author`, "United States", "US"))) %>% 
left_join(., countries, by = c("Country of the correspondence author" = "Country")) %>%
  mutate(Continent = case_when(
    `Country of the correspondence author` == "Taiwan" ~ "Asia", 
    `Country of the correspondence author` == "Australia" ~ "Oceania",
    `Country of the correspondence author` == "Canada" ~ "North America",
    `Country of the correspondence author` == "China" ~ "Asia",
    `Country of the correspondence author` == "India" ~ "Asia",
    `Country of the correspondence author` == "Japan" ~ "Asia",
    `Country of the correspondence author` == "Israel" ~ "Asia",
    `Country of the correspondence author` == "Kuwait" ~ "Asia",
    `Country of the correspondence author` == "Morocco" ~ "Africa",
    `Country of the correspondence author` == "Qatar" ~ "Asia",
    `Country of the correspondence author` == "Tunisia" ~ "Africa",
    TRUE ~ "Europe"
  )) 
rm(countries)

EDA

How many papers

n_distinct(df$TITLE)
## [1] 103

From how many journals?

n_distinct(df$JOURNAL)
## [1] 68

List of journals with at least 3 articles

df %>% 
  mutate(JOURNAL = fct_lump_min(JOURNAL, min = 3)) %>% 
  count(JOURNAL) %>% 
  arrange(desc(n))
## # A tibble: 9 × 2
##   JOURNAL                                             n
##   <fct>                                           <int>
## 1 Other                                              68
## 2 Diagnostic Cytopathology                            7
## 3 Journal Of Oral And Maxillofacial Surgery           6
## 4 The Journal Of Laryngology And Otology              6
## 5 British Medical Journal Case Report                 4
## 6 Archives Of Otolaryngology--Head & Neck Surgery     3
## 7 European Archives Of Oto-Rhino-Laryngology          3
## 8 Head And Neck                                       3
## 9 The Laryngoscope                                    3

Patients distribution by paper

df %>% 
  ggplot(aes(x = `Number of patients`)) + 
  geom_histogram(bins = 6)

How many patients per paper

summary(df$`Number of patients`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.262   1.000   7.000

How many patients in total

sum(df$`Number of patients`)
## [1] 130

Males and females

df %>% 
  pivot_longer(`Males reported`:`Females reported`, 
               names_to = "sex", 
               values_to = "sex_values") %>% 
  ggplot(aes(x = sex_values, 
             fill = "sex")) + 
               
  geom_histogram(bins = 6) + 
  facet_grid(sex ~ .) + 
  theme(legend.position="none")

Year of publication

df %>% 
  ggplot(aes(x = `YEAR published`)) + 
  geom_histogram(bins = 10)

Age of the patients

df %>% 
  ggplot(aes(Age)) + 
  geom_histogram(bins = 10)

Age of the patients by sex

df %>% 
  pivot_longer(`Males reported`:`Females reported`, 
               names_to = "sex", 
               values_to = "sex_values") %>% 
  ggplot(aes(y = sex_values, 
             x = Age, 
             color = sex)) + 
  geom_jitter(alpha = .7) + 
  facet_grid(sex ~ . ) +
  theme(legend.position="none")

df %>% 
  pivot_longer(`Males reported`:`Females reported`, 
               names_to = "sex", 
               values_to = "sex_values") %>% 
  ggplot(aes(y = Age, 
             x = sex, 
             color = sex)) + 
  geom_boxplot(alpha = .7) + 
  geom_jitter(alpha = .2, width = .2) +
  theme(legend.position="none")

Age by Continent

df %>% 
  distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
  ggplot(aes(x = fct_reorder(Continent, Age, .fun = median), 
             y = Age)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .1, width = 0.2)

Case reports by continent

df %>% 
  distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
  group_by(Continent, `YEAR published`) %>% 
  ggplot(aes(x = `YEAR published`, 
             y = `Number of patients`, 
             color = Continent))  +
  geom_jitter(alpha = .4) + 
  scale_y_log10() + 
  facet_grid(. ~ Continent)

CARE ANALYSIS

Convert the answers to points

Yes = 1

Unclear = .1

No = 0

df <- df %>% 
  select(-c(TITLE, Reviewer, `Laika zīmogs`, 
            AUTHORS, `Copy and paste the abstract`, 
            Comments)) %>% 
  # reshape the dataset
  pivot_longer(contains("CARE"), 
               names_to = "CARE_item", 
               values_to = "CARE_value") %>% 
  # create a new column with the values of CARE
  mutate(CARE_value_num = case_when(
    CARE_value == "Yes" ~ "1", 
    CARE_value == "Unclear" ~ "0.1", 
    TRUE ~ "0"
  ))

TABLE 1 CARE items compliance

Compliance per CARE item

df %>% 
  select(CARE_item, CARE_value) %>% 
  mutate(CARE_item = fct_inorder(CARE_item)) %>% # reorder by appeareance
  gtsummary::tbl_summary(by = CARE_value, 
                         percent = "row") %>% 
  modify_header(update = list(
  label ~ '**Characteristic**',
  stat_1 ~ '**No**',
  stat_2 ~ '**Unclear**',
  stat_3 ~ '**Yes**'
))
Characteristic No1 Unclear1 Yes1
CARE_item
CARE Appraisal [Title 1 The diagnosis or intervention of primary focus followed by the words “case report”] 66 (64%) 0 (0%) 37 (36%)
CARE Appraisal [Key Words 2 2 to 5 key words that identify diagnoses or interventions in this case report, including "case report"] 102 (99%) 0 (0%) 1 (1.0%)
CARE Appraisal [Abstract 3a Introduction: What is unique about this case and what does it add to the scientific literature?] 34 (33%) 0 (0%) 69 (67%)
CARE Appraisal [Abstract 3b Main symptoms and/or important clinical findings] 37 (36%) 1 (1.0%) 65 (63%)
CARE Appraisal [Abstract 3c The main diagnoses, therapeutic interventions, and outcomes] 55 (53%) 0 (0%) 48 (47%)
CARE Appraisal [Abstract 3d Conclusion—What is the main “take-away” lesson(s) from this case?] 45 (44%) 0 (0%) 58 (56%)
CARE Appraisal [Introduction 4 One or two paragraphs summarizing why this case is unique (may include references)] 13 (13%) 0 (0%) 90 (87%)
CARE Appraisal [Patient Information 5a De-identified patient specific information.] 1 (1.0%) 0 (0%) 102 (99%)
CARE Appraisal [5b Primary concerns and symptoms of the patient.] 3 (2.9%) 0 (0%) 100 (97%)
CARE Appraisal [5c Medical, family, and psycho-social history including relevant genetic information] 100 (97%) 0 (0%) 3 (2.9%)
CARE Appraisal [5d Relevant past interventions with outcomes] 34 (33%) 0 (0%) 69 (67%)
CARE Appraisal [Clinical Findings 6 Describe significant physical examination (PE) and important clinical findings.] 2 (1.9%) 0 (0%) 101 (98%)
CARE Appraisal [Timeline 7 Historical and current information from this episode of care organized as a timeline] 10 (9.7%) 0 (0%) 93 (90%)
CARE Appraisal [Diagnostic Assessment 8a Diagnostic testing (such as PE, laboratory testing, imaging, surveys).] 2 (1.9%) 0 (0%) 101 (98%)
CARE Appraisal [Diagnostic Assessment 8b Diagnostic challenges (such as access to testing, financial, or cultural)] 97 (94%) 0 (0%) 6 (5.8%)
CARE Appraisal [8c Diagnosis (including other diagnoses considered)] 66 (64%) 0 (0%) 37 (36%)
CARE Appraisal [Therapeutic Intervention Diagnostic Assessment 8d Prognosis (such as staging in oncology) where applicable] 71 (69%) 1 (1.0%) 31 (30%)
CARE Appraisal [Therapeutic Intervention 9a Types of therapeutic intervention (such as pharmacologic, surgical, preventive, self-care)] 4 (3.9%) 0 (0%) 99 (96%)
CARE Appraisal [Therapeutic Intervention 9b Administration of therapeutic intervention (such as dosage, strength, duration)] 72 (70%) 0 (0%) 31 (30%)
CARE Appraisal [Therapeutic Intervention 9c Changes in therapeutic intervention (with rationale)] 26 (25%) 0 (0%) 77 (75%)
CARE Appraisal [Follow-up and Outcomes 10a Clinician and patient-assessed outcomes (if available).] 35 (34%) 1 (1.0%) 67 (65%)
CARE Appraisal [Follow-up and Outcomes 10b Important follow-up diagnostic and other test results] 26 (25%) 1 (1.0%) 76 (74%)
CARE Appraisal [Follow-up and Outcomes 10c Intervention adherence and tolerability (How was this assessed?)] 81 (79%) 2 (1.9%) 20 (19%)
CARE Appraisal [Follow-up and Outcomes 10d Adverse and unanticipated events] 21 (20%) 0 (0%) 82 (80%)
CARE Appraisal [Discussion 11a A scientific discussion of the strengths AND limitations associated with this case report] 72 (70%) 0 (0%) 31 (30%)
CARE Appraisal [Discussion 11b Discussion of the relevant medical literature with references.] 3 (2.9%) 0 (0%) 100 (97%)
CARE Appraisal [Discussion 11c The scientific rationale for any conclusions (including assessment of possible causes)] 4 (3.9%) 0 (0%) 99 (96%)
CARE Appraisal [Discussion 11d The primary “take-away” lessons of this case report (without references) in a one paragraph conclusion] 30 (29%) 0 (0%) 73 (71%)
CARE Appraisal [Patient Perspective 12 The patient should share their perspective in one to two paragraphs on the treatment(s) they received] 103 (100%) 0 (0%) 0 (0%)
CARE Appraisal [Informed Consent 13 Did the patient give informed consent? Please provide if requested] 3 (2.9%) 90 (87%) 10 (9.7%)

1 n (%)

Calculate the average quality per paper

df_sum <- df %>%
  janitor::clean_names() %>% # convert the names
  mutate(care_value_num = as.double(care_value_num)) %>%  # change from chr to int
  select(id, care_item, care_value_num) %>% # select only some columns. Later need to join
  # reshapre the dataset
  pivot_wider(names_from = "care_item",
              values_from = "care_value_num") %>%
  relocate(id, .after = last_col()) %>% 
  rowwise() %>% 
  mutate(care_sum = sum(c_across(starts_with("care"))), .keep = "all") %>% 
  ungroup() %>% 
  select(ID = id, care_sum)

Now merge the df_sum

df <- left_join(df, df_sum, by = "ID")

Convert the CARE_num_vale to num

df <- df %>% 
  mutate(CARE_value_num = as.double(CARE_value_num))

Remove the df_sum

rm(df_sum)

FIGURE 4 CARE score by year

Calculate the average quality per year

df %>%
  # reshape
  mutate(Decade = floor(`YEAR published` / 10) * 10) %>%
  # mutate(Decade = santoku::chop(`YEAR published`, c(1969, 1979, 1989, 1999, 2009, 2019))) %>%
  pivot_wider(names_from = CARE_item,
              values_from = CARE_value) %>%
  distinct(., ID, .keep_all = TRUE) %>%  # filter unique IDs
  select(ID, Decade, care_sum) %>%
  ggplot(aes(x = as.factor(Decade),
             y = care_sum)) +
  geom_jitter(color = "grey90") +
  geom_boxplot(width = .2, color = "grey60") +
  geom_violin(width = .9, fill = NA) +
  labs(
    title = "Average CARE compliance per decade",
    subtitle = "Median and 25%−75% Quartiles\nDotted line in 2013 marks the publication of the CARE guidelines",
    y = "CARE Compliance",
    x = "Decade"
  ) +
  geom_vline(
    aes(xintercept = 5.3), linetype = "dashed", colour = "red", size = 0.5)

Quality by continent

df %>%
  distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values, since there is the care_sum for each id 
  ggplot(aes(x = fct_reorder(Continent, care_sum), 
             y = care_sum)) + 
  geom_boxplot() + 
  geom_jitter(alpha = .1) + 
  labs(title = "CARE Compliance by Continent", 
       x = "Continent", 
       y = "CARE score")

But keep in mind the n for each continent

df %>%
  distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values, since there is the care_sum for each id 
  group_by(Continent) %>% 
  summarise(n = n(), "Mean" = mean(care_sum), "sd" = sd(care_sum), min = min(care_sum), max = max(care_sum)) %>% 
  mutate_if(is.numeric, round, 1)
## # A tibble: 5 × 6
##   Continent         n  Mean    sd   min   max
##   <chr>         <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Africa            2  18.6   2.1  17.1  20  
## 2 Asia             23  17.9   4.4   6.1  25  
## 3 Europe           75  17     3.6   7.1  23.1
## 4 North America     1  21.1  NA    21.1  21.1
## 5 Oceania           2  19.6   0.6  19.1  20

Papers published by Year

df %>%

  distinct(., ID, .keep_all = TRUE) %>%   # filter unique IDs
  ggplot(aes(x = `YEAR published`)) + 
  geom_histogram(bins = 10, fill = "grey50") + 
  labs(title = "Case reports publishes by year", 
       x = "Year", 
       y = "Articles")

papers publishes by year by continent

df %>%
  distinct(., ID, .keep_all = TRUE) %>%   # filter unique IDs
  ggplot(aes(x = `YEAR published`, 
             fill = Continent)) +
  geom_histogram(bins = 8) +
  facet_grid(Continent ~  .) +
  labs(title = "Publications by Corresponding Author Country",
       x = "Year",
       y = "Publications") +
  theme(legend.position="none")

create a new CARE simplified name items

df <- df %>% 
  mutate(CARE_name_simple = case_when(
    str_detect(CARE_item,"Title 1 The diagnosis")  ~ "01 Title", 
    str_detect(CARE_item,"Key Words 2 2 to 5 key") ~ "02 Keywords",
    str_detect(CARE_item,"Abstract 3") ~ "03 Abstract", 
    str_detect(CARE_item,"Introduction 4") ~ "04 Introduction",
    str_detect(CARE_item,"5") ~ "05 Patient Information",
    str_detect(CARE_item,"6") ~ "06 Clinical Findings",
    str_detect(CARE_item,"7") ~ "07 Timeline",
    str_detect(CARE_item,"8") ~ "08 Diagnostic Assessment",
    str_detect(CARE_item,"9") ~ "09 Therapeutic Intervention",
    str_detect(CARE_item,"10") ~ "10 Follow-up and Outcomes",
    str_detect(CARE_item,"11") ~ "11 Discussion",
    str_detect(CARE_item,"12") ~ "12 Patient Perspective",
    TRUE ~ "13 Informed Consent"
  )) 

Count CARE new items

df %>% 
  mutate(CARE_value = fct_relevel(CARE_value, "Yes", "Unclear")) %>%
  select(CARE_name_simple, CARE_value) %>% 
  group_by(CARE_name_simple, CARE_value) %>% 
  # count() %>% 
  gtsummary::tbl_summary(by = CARE_value, 
                         percent = "row") %>% 
  modify_header(update = list(
  label ~ '**Characteristic**',
  stat_1 ~ '**Yes**',
  stat_2 ~ '**Unclear**',
  stat_3 ~ '**No**'
))
Characteristic Yes1 Unclear1 No1
CARE_name_simple
01 Title 37 (36%) 0 (0%) 66 (64%)
02 Keywords 1 (1.0%) 0 (0%) 102 (99%)
03 Abstract 240 (58%) 1 (0.2%) 171 (42%)
04 Introduction 90 (87%) 0 (0%) 13 (13%)
05 Patient Information 274 (67%) 0 (0%) 138 (33%)
06 Clinical Findings 101 (98%) 0 (0%) 2 (1.9%)
07 Timeline 93 (90%) 0 (0%) 10 (9.7%)
08 Diagnostic Assessment 175 (42%) 1 (0.2%) 236 (57%)
09 Therapeutic Intervention 207 (67%) 0 (0%) 102 (33%)
10 Follow-up and Outcomes 245 (59%) 4 (1.0%) 163 (40%)
11 Discussion 303 (74%) 0 (0%) 109 (26%)
12 Patient Perspective 0 (0%) 0 (0%) 103 (100%)
13 Informed Consent 10 (9.7%) 90 (87%) 3 (2.9%)

1 n (%)

Analysis before and after 2013

Create a new var indicating if before or after

df <- df %>% 
  mutate(moment = case_when(
    `YEAR published` < 2013 ~ "Before", 
    TRUE ~"After"
  )) 

Table 3

Now compare before and after

df %>% 
  mutate(Continent = fct_relevel(Continent, "Europe")) %>% # since europe is the main continent, I will left as baseline
  mutate(moment = fct_relevel(moment, "Before")) %>% # before is the baseline
  distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
  mutate(Continent = fct_lump_min(Continent, min = 3)) %>%  # since there are soo few papers in others continents, Let's lump them
  mutate(JOURNAL = fct_lump_min(JOURNAL, min = 3)) %>% 
  mutate(JOURNAL = fct_relevel(JOURNAL, "Other")) %>% 
  rename("Moment" = "moment") %>% 
  with(lm(care_sum ~ Moment + JOURNAL + Continent)) %>% 
  gtsummary::tbl_regression() %>% 
  gtsummary::add_n(location = "level") %>% 
  gtsummary::bold_labels()
Characteristic N Beta 95% CI1 p-value
Moment
Before 68
After 35 1.4 -0.29, 3.1 0.10
JOURNAL
Other 68
Archives Of Otolaryngology--Head & Neck Surgery 3 -2.3 -6.6, 2.0 0.3
British Medical Journal Case Report 4 2.7 -1.2, 6.6 0.2
Diagnostic Cytopathology 7 -0.20 -3.1, 2.6 0.9
European Archives Of Oto-Rhino-Laryngology 3 1.7 -2.6, 6.0 0.4
Head And Neck 3 -1.0 -5.3, 3.2 0.6
Journal Of Oral And Maxillofacial Surgery 6 0.36 -2.8, 3.5 0.8
The Journal Of Laryngology And Otology 6 -2.8 -6.0, 0.28 0.074
The Laryngoscope 3 -2.4 -6.7, 1.9 0.3
Continent
Europe 75
Asia 23 0.57 -1.2, 2.4 0.5
Other 5 1.2 -2.2, 4.6 0.5

1 CI = Confidence Interval

Analysis per item before/after

df %>% 
  pivot_wider(names_from = CARE_item, 
              values_from = CARE_value) %>% 
  distinct_at(vars(ID), .keep_all = TRUE) %>%  # filter unique values
  group_by(ID, moment) %>% 
  group_by(moment) %>% 
  summarise(n = n(), mean = mean(care_sum), sd = sd(care_sum)) %>% 
  mutate_if(is.numeric, round, 1)
## # A tibble: 2 × 4
##   moment     n  mean    sd
##   <chr>  <dbl> <dbl> <dbl>
## 1 After     35  18.8   3.2
## 2 Before    68  16.6   3.8

Main table CARE compliance

df %>% 
  # reorder the CARE names
  mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
  # select only relevant columns
  select(ID,  moment, CARE_name_simple, CARE_value) %>%
  # summarise the data
  group_by(CARE_name_simple, CARE_value) %>%
  summarise(n = n()) %>%
  # calculate the proportion
  mutate(freq = n / sum(n)) %>% 
  select(-n) %>% 
  mutate(freq = freq * 100) %>% 
  pivot_wider(names_from = CARE_value, 
              values_from = freq, 
              values_fill = 0) %>% 
  mutate_if(is.numeric, round, 1) %>% 
  relocate("Yes", .after = CARE_name_simple) %>% 
  relocate("Unclear", .after = "Yes")
## # A tibble: 13 × 4
## # Groups:   CARE_name_simple [13]
##    CARE_name_simple              Yes Unclear    No
##    <fct>                       <dbl>   <dbl> <dbl>
##  1 01 Title                     35.9     0    64.1
##  2 02 Keywords                   1       0    99  
##  3 03 Abstract                  58.3     0.2  41.5
##  4 04 Introduction              87.4     0    12.6
##  5 05 Patient Information       66.5     0    33.5
##  6 06 Clinical Findings         98.1     0     1.9
##  7 07 Timeline                  90.3     0     9.7
##  8 08 Diagnostic Assessment     42.5     0.2  57.3
##  9 09 Therapeutic Intervention  67       0    33  
## 10 10 Follow-up and Outcomes    59.5     1    39.6
## 11 11 Discussion                73.5     0    26.5
## 12 12 Patient Perspective        0       0   100  
## 13 13 Informed Consent           9.7    87.4   2.9

By raw scores

df %>% 
  mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
  select(ID,  moment, CARE_name_simple, CARE_value) %>%
  group_by(CARE_name_simple, CARE_value) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n)) %>% 
  ggplot(aes(x = fct_rev(CARE_name_simple), 
             y = n, 
             fill = CARE_value)) + 
  geom_col(position = "fill") + 
  scale_fill_manual(values = c("#cc3232", "#e7b416", "#2dc937")) +
  coord_flip() + 
  labs(title = "CARE Items Compliance (Raw scores)", 
       x = "CARE Item", 
       y = "Percentage", 
       fill = "Compliance") +
  scale_y_continuous(labels = label_percent())

By percentages

df %>% 
  mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
  select(ID,  moment, CARE_name_simple, CARE_value) %>%
  group_by(CARE_name_simple, CARE_value) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n)) %>% 
  ggplot(aes(x = fct_rev(CARE_name_simple), 
             y = freq, 
             fill = CARE_value)) + 
  geom_col(position = "fill") + 
  scale_fill_manual(values = c("#cc3232", "#e7b416", "#2dc937")) +
  coord_flip() + 
  labs(title = "CARE Items Compliance (%)", 
       x = "CARE Item", 
       y = "Percentage", 
       fill = "Compliance") +
  scale_y_continuous(labels = label_percent())

Change before and after

df %>% 
  # reorder the CARE names
  mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
  # select only relevant columns
  select(ID,  moment, CARE_name_simple, CARE_value) %>%
  # summarise the data
  group_by(CARE_name_simple, CARE_value, moment) %>%
  summarise(n = n()) %>%
  mutate(moment = fct_relevel(moment, c("Before", "After"))) %>% 
  mutate(CARE_value = fct_relevel(CARE_value, c("Yes", "Unclear", "No"))) %>% 
  pivot_wider(names_from = moment, 
              values_from = n, 
              values_fill = FALSE) %>% 
  mutate(suma = Before + After) %>% 
  mutate(Before = Before / suma * 100) %>% 
  mutate(After = After / suma * 100) %>% 
  ungroup() %>% 
  select(-c(suma)) %>% 
  mutate_if(is.numeric, round, 1) %>% 
  filter(CARE_value == "Yes") %>% 
  
  # pivoting to long
  pivot_longer(Before:After, 
               names_to = "moment", 
               values_to = "moment_values") %>% 
  mutate(moment = fct_relevel(moment, c("Before"))) %>% 

  ggplot(aes(x = fct_rev(CARE_name_simple), 
             y = moment_values, 
             color = moment)) + 
  geom_point() + 
  coord_flip() +
  labs(title = "Change in percentage of compliance\n(CARE = Yes) before and after 2013", 
       x = "CARE Items", 
       y = "Compliance percentage", 
       color = "Moment")  +
  theme(legend.position="top")

df %>%
  # reorder the CARE names
  mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
  # select only relevant columns
  select(ID,  moment, CARE_name_simple, CARE_value) %>%
  # summarise the data
  group_by(CARE_name_simple, CARE_value, moment) %>%
  summarise(n = n()) %>%
  # calculate the proportion
  mutate(freq = n / sum(n)) %>%
  select(-n) %>%
  mutate(freq = freq * 100) %>%
  mutate(moment = fct_relevel(moment, c("Before"))) %>%
  filter(CARE_value == "Yes")  %>%
  ungroup() %>%
  select(-CARE_value) %>% 
  pivot_wider(names_from = moment, 
              values_from = freq, 
              values_fill = FALSE) %>% 
  relocate(Before, .after = CARE_name_simple) %>% 
  ggplot() +
  geom_segment(aes(
    x = fct_rev(CARE_name_simple),    xend = CARE_name_simple,
    y = Before,    yend = After  ), color = "grey") +
  
  geom_point(aes(x = CARE_name_simple, y = Before),
             color = "green", size = 3) +
  
  geom_point(aes(x = CARE_name_simple, y = After), 
             color = "red", size = 3) +  coord_flip() +
  coord_flip() + 
  labs(title = "Change in percentage of compliance\n(CARE = Yes) before and after 2013", 
       x = "CARE Items") 

Wordcloud

 docs <- Corpus(VectorSource(text))
 docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
# Convert the text to lower case
 docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
 docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
 docs <- tm_map(docs, removeWords, stopwords("english"))
# specify your stopwords as a character vector
 docs <- tm_map(docs, removeWords, c("keywords")) 
# Remove punctuations
 docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
 docs <- tm_map(docs, stripWhitespace)
# Text stemming
 docs <- tm_map(docs, stemDocument)
 dtm <- TermDocumentMatrix(docs)
 m <- as.matrix(dtm)
 v <- sort(rowSums(m),decreasing=TRUE)
 d <- data.frame(word = names(v),freq=v)
 head(d, 10)
##                word freq
## parotid     parotid  523
## gland         gland  366
## case           case  316
## carcinoma carcinoma  254
## present     present  251
## metastat   metastat  223
## metastasi metastasi  213
## cell           cell  212
## patient     patient  206
## report       report  193
 set.seed(1234)
 wordcloud(
  words = d$word,
  freq = d$freq,
  min.freq = 35,
  max.words = 200,
  random.order = TRUE,
  rot.per = 0.35,
  colors = brewer.pal(8, "Dark2")
 )

Which terms are correlated?

# findAssocs(dtm, terms = "primary", corlimit = 0.3)

Ingus 1 march 2022

I: Agreement between Ingus and Peteris

0.75

S: what is the quality of the reports?

I: what is the quality before and after 2013

I: difference between journals