pacman::p_load(tidyverse,
gtsummary,
ggcharts , # see https://thomas-neitmann.github.io/ggcharts/reference/dumbbell_chart.html
dlookr,
scales, # for the % scales in ggplot2
patchwork, # for multiple plots
santoku, # to break variables
DescTools, # for agreement
irr, # for agreement
janitor)
For wordcloud
pacman::p_load(wordcloud, # word-cloud generator
SnowballC, # for text stemming
RColorBrewer, # palette
wordcloud2,
tm) # for text mining
theme_set(theme_minimal())
Dataset
df <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vRKPzFe2lbF87DNe9SBjuaIb5iMb4nCmgvfdyT4v-NjK-BZBR-HkUIUdgiML3t30EqQ1RCep0sExatK/pub?output=csv")
names(df)
## [1] "Laika zīmogs"
## [2] "TITLE"
## [3] "Reviewer"
## [4] "AUTHORS"
## [5] "JOURNAL"
## [6] "YEAR published"
## [7] "Number of patients"
## [8] "Main focus of the report"
## [9] "Copy and paste the abstract"
## [10] "Country of the correspondence author"
## [11] "CARE Appraisal [Title 1 The diagnosis or intervention of primary focus followed by the words “case report”]"
## [12] "CARE Appraisal [Key Words 2 2 to 5 key words that identify diagnoses or interventions in this case report, including \"case report\"]"
## [13] "CARE Appraisal [Abstract 3a Introduction: What is unique about this case and what does it add to the scientific literature?]"
## [14] "CARE Appraisal [Abstract 3b Main symptoms and/or important clinical findings]"
## [15] "CARE Appraisal [Abstract 3c The main diagnoses, therapeutic interventions, and outcomes]"
## [16] "CARE Appraisal [Abstract 3d Conclusion—What is the main “take-away” lesson(s) from this case?]"
## [17] "CARE Appraisal [Introduction 4 One or two paragraphs summarizing why this case is unique (may include references)]"
## [18] "CARE Appraisal [Patient Information 5a De-identified patient specific information.]"
## [19] "CARE Appraisal [5b Primary concerns and symptoms of the patient.]"
## [20] "CARE Appraisal [5c Medical, family, and psycho-social history including relevant genetic information]"
## [21] "CARE Appraisal [5d Relevant past interventions with outcomes]"
## [22] "CARE Appraisal [Clinical Findings 6 Describe significant physical examination (PE) and important clinical findings.]"
## [23] "CARE Appraisal [Timeline 7 Historical and current information from this episode of care organized as a timeline]"
## [24] "CARE Appraisal [Diagnostic Assessment 8a Diagnostic testing (such as PE, laboratory testing, imaging, surveys).]"
## [25] "CARE Appraisal [Diagnostic Assessment 8b Diagnostic challenges (such as access to testing, financial, or cultural)]"
## [26] "CARE Appraisal [8c Diagnosis (including other diagnoses considered)]"
## [27] "CARE Appraisal [Therapeutic Intervention Diagnostic Assessment 8d Prognosis (such as staging in oncology) where applicable]"
## [28] "CARE Appraisal [Therapeutic Intervention 9a Types of therapeutic intervention (such as pharmacologic, surgical, preventive, self-care)]"
## [29] "CARE Appraisal [Therapeutic Intervention 9b Administration of therapeutic intervention (such as dosage, strength, duration)]"
## [30] "CARE Appraisal [Therapeutic Intervention 9c Changes in therapeutic intervention (with rationale)]"
## [31] "CARE Appraisal [Follow-up and Outcomes 10a Clinician and patient-assessed outcomes (if available).]"
## [32] "CARE Appraisal [Follow-up and Outcomes 10b Important follow-up diagnostic and other test results]"
## [33] "CARE Appraisal [Follow-up and Outcomes 10c Intervention adherence and tolerability (How was this assessed?)]"
## [34] "CARE Appraisal [Follow-up and Outcomes 10d Adverse and unanticipated events]"
## [35] "CARE Appraisal [Discussion 11a A scientific discussion of the strengths AND limitations associated with this case report]"
## [36] "CARE Appraisal [Discussion 11b Discussion of the relevant medical literature with references.]"
## [37] "CARE Appraisal [Discussion 11c The scientific rationale for any conclusions (including assessment of possible causes)]"
## [38] "CARE Appraisal [Discussion 11d The primary “take-away” lessons of this case report (without references) in a one paragraph conclusion]"
## [39] "CARE Appraisal [Patient Perspective 12 The patient should share their perspective in one to two paragraphs on the treatment(s) they received]"
## [40] "CARE Appraisal [Informed Consent 13 Did the patient give informed consent? Please provide if requested]"
## [41] "Comments"
## [42] "Declared study design"
## [43] "Males reported"
## [44] "Females reported"
## [45] "Age"
## [46] "Race (if reported)"
## [47] "ID"
For the wordcloud later
text <- df$`Copy and paste the abstract`
dlookr::diagnose(df)
## # A tibble: 47 × 6
## variables types missing_count missing_percent unique_count unique_rate
## <chr> <chr> <int> <dbl> <int> <dbl>
## 1 Laika zīmogs char… 0 0 204 1
## 2 TITLE char… 0 0 105 0.515
## 3 Reviewer char… 0 0 3 0.0147
## 4 AUTHORS char… 0 0 128 0.627
## 5 JOURNAL char… 0 0 76 0.373
## 6 YEAR published nume… 0 0 38 0.186
## 7 Number of patie… nume… 0 0 7 0.0343
## 8 Main focus of t… char… 0 0 4 0.0196
## 9 Copy and paste … char… 0 0 175 0.858
## 10 Country of the … char… 0 0 23 0.113
## # … with 37 more rows
df <- df %>%
mutate(JOURNAL = str_to_title(JOURNAL)) %>% # change the capitalization
mutate(JOURNAL = str_trim(JOURNAL, side = c("both"))) %>% # remove spaces
mutate(JOURNAL = str_replace(JOURNAL, "Resaerch", "Research")) %>%
mutate(JOURNAL = str_replace(JOURNAL, "Otolaryngology-Head And Neck Surgery", "Otolaryngology–Head And Neck Surgery")) %>%
mutate(JOURNAL = str_replace(JOURNAL, "Otolaryngology–Head And Neck Surgery", "Archives Of Otolaryngology--Head & Neck Surgery")) %>%
mutate(JOURNAL = str_replace(JOURNAL, "The Journal Of Laryngology & Otology", "The Journal Of Laryngology And Otology"))
Check agreement
How many raters?
df %>%
tabyl(Reviewer)
## Reviewer n percent
## IA 101 0.495098039
## PJ 101 0.495098039
## SU 2 0.009803922
Kappa
df %>%
select(Reviewer, ID, `CARE Appraisal [Abstract 3b Main symptoms and/or important clinical findings]`:`CARE Appraisal [Key Words 2 2 to 5 key words that identify diagnoses or interventions in this case report, including "case report"]`) %>% # leave only the relevant columns
filter(Reviewer != "SU") %>% # remove SU from the rater
# now reshape the dataset to obtain three columns
pivot_longer(-c(Reviewer, ID),
names_to = "Care_item",
values_to = "Care_values") %>%
select(-c(Care_item)) %>%
# now reshape again to obtains the values per rater
pivot_wider(id_cols = ID,
names_from = Reviewer,
values_from = Care_values) %>%
tidyr::unnest() %>% # this is to recover the values, check shorturl.at/gpAG3
filter(!is.na(IA)) %>%
filter(!is.na(PJ)) %>%
select(-ID) %>%
kappam.fleiss(., detail=TRUE)
## Fleiss' Kappa for m Raters
##
## Subjects = 267
## Raters = 2
## Kappa = 0.715
##
## z = 11.7
## p-value = 0
##
## Kappa z p.value
## No 0.715 11.681 0.000
## Yes 0.715 11.681 0.000
The kappa between the rater is .715
Since they are comparable, I will leave only one + SU
df <- df %>%
filter(Reviewer %in% c('IA', 'SU'))
add a continent column
countries <- read_csv("https://raw.githubusercontent.com/dbouquin/IS_608/master/NanosatDB_munging/Countries-Continents.csv")
glimpse(countries)
## Rows: 194
## Columns: 2
## $ Continent <chr> "Africa", "Africa", "Africa", "Africa", "Africa", "Africa", …
## $ Country <chr> "Algeria", "Angola", "Benin", "Botswana", "Burkina", "Burund…
df <- df %>%
mutate(`Country of the correspondence author` = (str_replace_all(`Country of the correspondence author`, "United States", "US"))) %>%
left_join(., countries, by = c("Country of the correspondence author" = "Country")) %>%
mutate(Continent = case_when(
`Country of the correspondence author` == "Taiwan" ~ "Asia",
`Country of the correspondence author` == "Australia" ~ "Oceania",
`Country of the correspondence author` == "Canada" ~ "North America",
`Country of the correspondence author` == "China" ~ "Asia",
`Country of the correspondence author` == "India" ~ "Asia",
`Country of the correspondence author` == "Japan" ~ "Asia",
`Country of the correspondence author` == "Israel" ~ "Asia",
`Country of the correspondence author` == "Kuwait" ~ "Asia",
`Country of the correspondence author` == "Morocco" ~ "Africa",
`Country of the correspondence author` == "Qatar" ~ "Asia",
`Country of the correspondence author` == "Tunisia" ~ "Africa",
TRUE ~ "Europe"
))
rm(countries)
EDA
How many papers
n_distinct(df$TITLE)
## [1] 103
From how many journals?
n_distinct(df$JOURNAL)
## [1] 68
List of journals with at least 3 articles
df %>%
mutate(JOURNAL = fct_lump_min(JOURNAL, min = 3)) %>%
count(JOURNAL) %>%
arrange(desc(n))
## # A tibble: 9 × 2
## JOURNAL n
## <fct> <int>
## 1 Other 68
## 2 Diagnostic Cytopathology 7
## 3 Journal Of Oral And Maxillofacial Surgery 6
## 4 The Journal Of Laryngology And Otology 6
## 5 British Medical Journal Case Report 4
## 6 Archives Of Otolaryngology--Head & Neck Surgery 3
## 7 European Archives Of Oto-Rhino-Laryngology 3
## 8 Head And Neck 3
## 9 The Laryngoscope 3
Patients distribution by paper
df %>%
ggplot(aes(x = `Number of patients`)) +
geom_histogram(bins = 6)

How many patients per paper
summary(df$`Number of patients`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.262 1.000 7.000
How many patients in total
sum(df$`Number of patients`)
## [1] 130
Males and females
df %>%
pivot_longer(`Males reported`:`Females reported`,
names_to = "sex",
values_to = "sex_values") %>%
ggplot(aes(x = sex_values,
fill = "sex")) +
geom_histogram(bins = 6) +
facet_grid(sex ~ .) +
theme(legend.position="none")

Year of publication
df %>%
ggplot(aes(x = `YEAR published`)) +
geom_histogram(bins = 10)

Age of the patients
df %>%
ggplot(aes(Age)) +
geom_histogram(bins = 10)

Age of the patients by sex
df %>%
pivot_longer(`Males reported`:`Females reported`,
names_to = "sex",
values_to = "sex_values") %>%
ggplot(aes(y = sex_values,
x = Age,
color = sex)) +
geom_jitter(alpha = .7) +
facet_grid(sex ~ . ) +
theme(legend.position="none")

df %>%
pivot_longer(`Males reported`:`Females reported`,
names_to = "sex",
values_to = "sex_values") %>%
ggplot(aes(y = Age,
x = sex,
color = sex)) +
geom_boxplot(alpha = .7) +
geom_jitter(alpha = .2, width = .2) +
theme(legend.position="none")

Age by Continent
df %>%
distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
ggplot(aes(x = fct_reorder(Continent, Age, .fun = median),
y = Age)) +
geom_boxplot() +
geom_jitter(alpha = .1, width = 0.2)

Case reports by continent
df %>%
distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
group_by(Continent, `YEAR published`) %>%
ggplot(aes(x = `YEAR published`,
y = `Number of patients`,
color = Continent)) +
geom_jitter(alpha = .4) +
scale_y_log10() +
facet_grid(. ~ Continent)

CARE ANALYSIS
Convert the answers to points
Yes = 1
Unclear = .1
No = 0
df <- df %>%
select(-c(TITLE, Reviewer, `Laika zīmogs`,
AUTHORS, `Copy and paste the abstract`,
Comments)) %>%
# reshape the dataset
pivot_longer(contains("CARE"),
names_to = "CARE_item",
values_to = "CARE_value") %>%
# create a new column with the values of CARE
mutate(CARE_value_num = case_when(
CARE_value == "Yes" ~ "1",
CARE_value == "Unclear" ~ "0.1",
TRUE ~ "0"
))
TABLE 1 CARE items compliance
Compliance per CARE item
df %>%
select(CARE_item, CARE_value) %>%
mutate(CARE_item = fct_inorder(CARE_item)) %>% # reorder by appeareance
gtsummary::tbl_summary(by = CARE_value,
percent = "row") %>%
modify_header(update = list(
label ~ '**Characteristic**',
stat_1 ~ '**No**',
stat_2 ~ '**Unclear**',
stat_3 ~ '**Yes**'
))
Characteristic |
No |
Unclear |
Yes |
CARE_item |
|
|
|
CARE Appraisal [Title 1 The diagnosis or intervention of primary focus followed by the words “case report”] |
66 (64%) |
0 (0%) |
37 (36%) |
CARE Appraisal [Key Words 2 2 to 5 key words that identify diagnoses or interventions in this case report, including "case report"] |
102 (99%) |
0 (0%) |
1 (1.0%) |
CARE Appraisal [Abstract 3a Introduction: What is unique about this case and what does it add to the scientific literature?] |
34 (33%) |
0 (0%) |
69 (67%) |
CARE Appraisal [Abstract 3b Main symptoms and/or important clinical findings] |
37 (36%) |
1 (1.0%) |
65 (63%) |
CARE Appraisal [Abstract 3c The main diagnoses, therapeutic interventions, and outcomes] |
55 (53%) |
0 (0%) |
48 (47%) |
CARE Appraisal [Abstract 3d Conclusion—What is the main “take-away” lesson(s) from this case?] |
45 (44%) |
0 (0%) |
58 (56%) |
CARE Appraisal [Introduction 4 One or two paragraphs summarizing why this case is unique (may include references)] |
13 (13%) |
0 (0%) |
90 (87%) |
CARE Appraisal [Patient Information 5a De-identified patient specific information.] |
1 (1.0%) |
0 (0%) |
102 (99%) |
CARE Appraisal [5b Primary concerns and symptoms of the patient.] |
3 (2.9%) |
0 (0%) |
100 (97%) |
CARE Appraisal [5c Medical, family, and psycho-social history including relevant genetic information] |
100 (97%) |
0 (0%) |
3 (2.9%) |
CARE Appraisal [5d Relevant past interventions with outcomes] |
34 (33%) |
0 (0%) |
69 (67%) |
CARE Appraisal [Clinical Findings 6 Describe significant physical examination (PE) and important clinical findings.] |
2 (1.9%) |
0 (0%) |
101 (98%) |
CARE Appraisal [Timeline 7 Historical and current information from this episode of care organized as a timeline] |
10 (9.7%) |
0 (0%) |
93 (90%) |
CARE Appraisal [Diagnostic Assessment 8a Diagnostic testing (such as PE, laboratory testing, imaging, surveys).] |
2 (1.9%) |
0 (0%) |
101 (98%) |
CARE Appraisal [Diagnostic Assessment 8b Diagnostic challenges (such as access to testing, financial, or cultural)] |
97 (94%) |
0 (0%) |
6 (5.8%) |
CARE Appraisal [8c Diagnosis (including other diagnoses considered)] |
66 (64%) |
0 (0%) |
37 (36%) |
CARE Appraisal [Therapeutic Intervention Diagnostic Assessment 8d Prognosis (such as staging in oncology) where applicable] |
71 (69%) |
1 (1.0%) |
31 (30%) |
CARE Appraisal [Therapeutic Intervention 9a Types of therapeutic intervention (such as pharmacologic, surgical, preventive, self-care)] |
4 (3.9%) |
0 (0%) |
99 (96%) |
CARE Appraisal [Therapeutic Intervention 9b Administration of therapeutic intervention (such as dosage, strength, duration)] |
72 (70%) |
0 (0%) |
31 (30%) |
CARE Appraisal [Therapeutic Intervention 9c Changes in therapeutic intervention (with rationale)] |
26 (25%) |
0 (0%) |
77 (75%) |
CARE Appraisal [Follow-up and Outcomes 10a Clinician and patient-assessed outcomes (if available).] |
35 (34%) |
1 (1.0%) |
67 (65%) |
CARE Appraisal [Follow-up and Outcomes 10b Important follow-up diagnostic and other test results] |
26 (25%) |
1 (1.0%) |
76 (74%) |
CARE Appraisal [Follow-up and Outcomes 10c Intervention adherence and tolerability (How was this assessed?)] |
81 (79%) |
2 (1.9%) |
20 (19%) |
CARE Appraisal [Follow-up and Outcomes 10d Adverse and unanticipated events] |
21 (20%) |
0 (0%) |
82 (80%) |
CARE Appraisal [Discussion 11a A scientific discussion of the strengths AND limitations associated with this case report] |
72 (70%) |
0 (0%) |
31 (30%) |
CARE Appraisal [Discussion 11b Discussion of the relevant medical literature with references.] |
3 (2.9%) |
0 (0%) |
100 (97%) |
CARE Appraisal [Discussion 11c The scientific rationale for any conclusions (including assessment of possible causes)] |
4 (3.9%) |
0 (0%) |
99 (96%) |
CARE Appraisal [Discussion 11d The primary “take-away” lessons of this case report (without references) in a one paragraph conclusion] |
30 (29%) |
0 (0%) |
73 (71%) |
CARE Appraisal [Patient Perspective 12 The patient should share their perspective in one to two paragraphs on the treatment(s) they received] |
103 (100%) |
0 (0%) |
0 (0%) |
CARE Appraisal [Informed Consent 13 Did the patient give informed consent? Please provide if requested] |
3 (2.9%) |
90 (87%) |
10 (9.7%) |
Calculate the average quality per paper
df_sum <- df %>%
janitor::clean_names() %>% # convert the names
mutate(care_value_num = as.double(care_value_num)) %>% # change from chr to int
select(id, care_item, care_value_num) %>% # select only some columns. Later need to join
# reshapre the dataset
pivot_wider(names_from = "care_item",
values_from = "care_value_num") %>%
relocate(id, .after = last_col()) %>%
rowwise() %>%
mutate(care_sum = sum(c_across(starts_with("care"))), .keep = "all") %>%
ungroup() %>%
select(ID = id, care_sum)
Now merge the df_sum
df <- left_join(df, df_sum, by = "ID")
Convert the CARE_num_vale to num
df <- df %>%
mutate(CARE_value_num = as.double(CARE_value_num))
Remove the df_sum
rm(df_sum)
FIGURE 4 CARE score by year
Calculate the average quality per year
df %>%
# reshape
mutate(Decade = floor(`YEAR published` / 10) * 10) %>%
# mutate(Decade = santoku::chop(`YEAR published`, c(1969, 1979, 1989, 1999, 2009, 2019))) %>%
pivot_wider(names_from = CARE_item,
values_from = CARE_value) %>%
distinct(., ID, .keep_all = TRUE) %>% # filter unique IDs
select(ID, Decade, care_sum) %>%
ggplot(aes(x = as.factor(Decade),
y = care_sum)) +
geom_jitter(color = "grey90") +
geom_boxplot(width = .2, color = "grey60") +
geom_violin(width = .9, fill = NA) +
labs(
title = "Average CARE compliance per decade",
subtitle = "Median and 25%−75% Quartiles\nDotted line in 2013 marks the publication of the CARE guidelines",
y = "CARE Compliance",
x = "Decade"
) +
geom_vline(
aes(xintercept = 5.3), linetype = "dashed", colour = "red", size = 0.5)

Quality by continent
df %>%
distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values, since there is the care_sum for each id
ggplot(aes(x = fct_reorder(Continent, care_sum),
y = care_sum)) +
geom_boxplot() +
geom_jitter(alpha = .1) +
labs(title = "CARE Compliance by Continent",
x = "Continent",
y = "CARE score")

But keep in mind the n for each continent
df %>%
distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values, since there is the care_sum for each id
group_by(Continent) %>%
summarise(n = n(), "Mean" = mean(care_sum), "sd" = sd(care_sum), min = min(care_sum), max = max(care_sum)) %>%
mutate_if(is.numeric, round, 1)
## # A tibble: 5 × 6
## Continent n Mean sd min max
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Africa 2 18.6 2.1 17.1 20
## 2 Asia 23 17.9 4.4 6.1 25
## 3 Europe 75 17 3.6 7.1 23.1
## 4 North America 1 21.1 NA 21.1 21.1
## 5 Oceania 2 19.6 0.6 19.1 20
Papers published by Year
df %>%
distinct(., ID, .keep_all = TRUE) %>% # filter unique IDs
ggplot(aes(x = `YEAR published`)) +
geom_histogram(bins = 10, fill = "grey50") +
labs(title = "Case reports publishes by year",
x = "Year",
y = "Articles")

papers publishes by year by continent
df %>%
distinct(., ID, .keep_all = TRUE) %>% # filter unique IDs
ggplot(aes(x = `YEAR published`,
fill = Continent)) +
geom_histogram(bins = 8) +
facet_grid(Continent ~ .) +
labs(title = "Publications by Corresponding Author Country",
x = "Year",
y = "Publications") +
theme(legend.position="none")

create a new CARE simplified name items
df <- df %>%
mutate(CARE_name_simple = case_when(
str_detect(CARE_item,"Title 1 The diagnosis") ~ "01 Title",
str_detect(CARE_item,"Key Words 2 2 to 5 key") ~ "02 Keywords",
str_detect(CARE_item,"Abstract 3") ~ "03 Abstract",
str_detect(CARE_item,"Introduction 4") ~ "04 Introduction",
str_detect(CARE_item,"5") ~ "05 Patient Information",
str_detect(CARE_item,"6") ~ "06 Clinical Findings",
str_detect(CARE_item,"7") ~ "07 Timeline",
str_detect(CARE_item,"8") ~ "08 Diagnostic Assessment",
str_detect(CARE_item,"9") ~ "09 Therapeutic Intervention",
str_detect(CARE_item,"10") ~ "10 Follow-up and Outcomes",
str_detect(CARE_item,"11") ~ "11 Discussion",
str_detect(CARE_item,"12") ~ "12 Patient Perspective",
TRUE ~ "13 Informed Consent"
))
Count CARE new items
df %>%
mutate(CARE_value = fct_relevel(CARE_value, "Yes", "Unclear")) %>%
select(CARE_name_simple, CARE_value) %>%
group_by(CARE_name_simple, CARE_value) %>%
# count() %>%
gtsummary::tbl_summary(by = CARE_value,
percent = "row") %>%
modify_header(update = list(
label ~ '**Characteristic**',
stat_1 ~ '**Yes**',
stat_2 ~ '**Unclear**',
stat_3 ~ '**No**'
))
Characteristic |
Yes |
Unclear |
No |
CARE_name_simple |
|
|
|
01 Title |
37 (36%) |
0 (0%) |
66 (64%) |
02 Keywords |
1 (1.0%) |
0 (0%) |
102 (99%) |
03 Abstract |
240 (58%) |
1 (0.2%) |
171 (42%) |
04 Introduction |
90 (87%) |
0 (0%) |
13 (13%) |
05 Patient Information |
274 (67%) |
0 (0%) |
138 (33%) |
06 Clinical Findings |
101 (98%) |
0 (0%) |
2 (1.9%) |
07 Timeline |
93 (90%) |
0 (0%) |
10 (9.7%) |
08 Diagnostic Assessment |
175 (42%) |
1 (0.2%) |
236 (57%) |
09 Therapeutic Intervention |
207 (67%) |
0 (0%) |
102 (33%) |
10 Follow-up and Outcomes |
245 (59%) |
4 (1.0%) |
163 (40%) |
11 Discussion |
303 (74%) |
0 (0%) |
109 (26%) |
12 Patient Perspective |
0 (0%) |
0 (0%) |
103 (100%) |
13 Informed Consent |
10 (9.7%) |
90 (87%) |
3 (2.9%) |
Analysis before and after 2013
Create a new var indicating if before or after
df <- df %>%
mutate(moment = case_when(
`YEAR published` < 2013 ~ "Before",
TRUE ~"After"
))
Table 3
Now compare before and after
df %>%
mutate(Continent = fct_relevel(Continent, "Europe")) %>% # since europe is the main continent, I will left as baseline
mutate(moment = fct_relevel(moment, "Before")) %>% # before is the baseline
distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
mutate(Continent = fct_lump_min(Continent, min = 3)) %>% # since there are soo few papers in others continents, Let's lump them
mutate(JOURNAL = fct_lump_min(JOURNAL, min = 3)) %>%
mutate(JOURNAL = fct_relevel(JOURNAL, "Other")) %>%
rename("Moment" = "moment") %>%
with(lm(care_sum ~ Moment + JOURNAL + Continent)) %>%
gtsummary::tbl_regression() %>%
gtsummary::add_n(location = "level") %>%
gtsummary::bold_labels()
Characteristic |
N |
Beta |
95% CI |
p-value |
Moment |
|
|
|
|
Before |
68 |
— |
— |
|
After |
35 |
1.4 |
-0.29, 3.1 |
0.10 |
JOURNAL |
|
|
|
|
Other |
68 |
— |
— |
|
Archives Of Otolaryngology--Head & Neck Surgery |
3 |
-2.3 |
-6.6, 2.0 |
0.3 |
British Medical Journal Case Report |
4 |
2.7 |
-1.2, 6.6 |
0.2 |
Diagnostic Cytopathology |
7 |
-0.20 |
-3.1, 2.6 |
0.9 |
European Archives Of Oto-Rhino-Laryngology |
3 |
1.7 |
-2.6, 6.0 |
0.4 |
Head And Neck |
3 |
-1.0 |
-5.3, 3.2 |
0.6 |
Journal Of Oral And Maxillofacial Surgery |
6 |
0.36 |
-2.8, 3.5 |
0.8 |
The Journal Of Laryngology And Otology |
6 |
-2.8 |
-6.0, 0.28 |
0.074 |
The Laryngoscope |
3 |
-2.4 |
-6.7, 1.9 |
0.3 |
Continent |
|
|
|
|
Europe |
75 |
— |
— |
|
Asia |
23 |
0.57 |
-1.2, 2.4 |
0.5 |
Other |
5 |
1.2 |
-2.2, 4.6 |
0.5 |
Analysis per item before/after
df %>%
pivot_wider(names_from = CARE_item,
values_from = CARE_value) %>%
distinct_at(vars(ID), .keep_all = TRUE) %>% # filter unique values
group_by(ID, moment) %>%
group_by(moment) %>%
summarise(n = n(), mean = mean(care_sum), sd = sd(care_sum)) %>%
mutate_if(is.numeric, round, 1)
## # A tibble: 2 × 4
## moment n mean sd
## <chr> <dbl> <dbl> <dbl>
## 1 After 35 18.8 3.2
## 2 Before 68 16.6 3.8
Main table CARE compliance
df %>%
# reorder the CARE names
mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
# select only relevant columns
select(ID, moment, CARE_name_simple, CARE_value) %>%
# summarise the data
group_by(CARE_name_simple, CARE_value) %>%
summarise(n = n()) %>%
# calculate the proportion
mutate(freq = n / sum(n)) %>%
select(-n) %>%
mutate(freq = freq * 100) %>%
pivot_wider(names_from = CARE_value,
values_from = freq,
values_fill = 0) %>%
mutate_if(is.numeric, round, 1) %>%
relocate("Yes", .after = CARE_name_simple) %>%
relocate("Unclear", .after = "Yes")
## # A tibble: 13 × 4
## # Groups: CARE_name_simple [13]
## CARE_name_simple Yes Unclear No
## <fct> <dbl> <dbl> <dbl>
## 1 01 Title 35.9 0 64.1
## 2 02 Keywords 1 0 99
## 3 03 Abstract 58.3 0.2 41.5
## 4 04 Introduction 87.4 0 12.6
## 5 05 Patient Information 66.5 0 33.5
## 6 06 Clinical Findings 98.1 0 1.9
## 7 07 Timeline 90.3 0 9.7
## 8 08 Diagnostic Assessment 42.5 0.2 57.3
## 9 09 Therapeutic Intervention 67 0 33
## 10 10 Follow-up and Outcomes 59.5 1 39.6
## 11 11 Discussion 73.5 0 26.5
## 12 12 Patient Perspective 0 0 100
## 13 13 Informed Consent 9.7 87.4 2.9
By raw scores
df %>%
mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
select(ID, moment, CARE_name_simple, CARE_value) %>%
group_by(CARE_name_simple, CARE_value) %>%
summarise(n = n()) %>%
mutate(freq = n / sum(n)) %>%
ggplot(aes(x = fct_rev(CARE_name_simple),
y = n,
fill = CARE_value)) +
geom_col(position = "fill") +
scale_fill_manual(values = c("#cc3232", "#e7b416", "#2dc937")) +
coord_flip() +
labs(title = "CARE Items Compliance (Raw scores)",
x = "CARE Item",
y = "Percentage",
fill = "Compliance") +
scale_y_continuous(labels = label_percent())

By percentages
df %>%
mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
select(ID, moment, CARE_name_simple, CARE_value) %>%
group_by(CARE_name_simple, CARE_value) %>%
summarise(n = n()) %>%
mutate(freq = n / sum(n)) %>%
ggplot(aes(x = fct_rev(CARE_name_simple),
y = freq,
fill = CARE_value)) +
geom_col(position = "fill") +
scale_fill_manual(values = c("#cc3232", "#e7b416", "#2dc937")) +
coord_flip() +
labs(title = "CARE Items Compliance (%)",
x = "CARE Item",
y = "Percentage",
fill = "Compliance") +
scale_y_continuous(labels = label_percent())

Change before and after
df %>%
# reorder the CARE names
mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
# select only relevant columns
select(ID, moment, CARE_name_simple, CARE_value) %>%
# summarise the data
group_by(CARE_name_simple, CARE_value, moment) %>%
summarise(n = n()) %>%
mutate(moment = fct_relevel(moment, c("Before", "After"))) %>%
mutate(CARE_value = fct_relevel(CARE_value, c("Yes", "Unclear", "No"))) %>%
pivot_wider(names_from = moment,
values_from = n,
values_fill = FALSE) %>%
mutate(suma = Before + After) %>%
mutate(Before = Before / suma * 100) %>%
mutate(After = After / suma * 100) %>%
ungroup() %>%
select(-c(suma)) %>%
mutate_if(is.numeric, round, 1) %>%
filter(CARE_value == "Yes") %>%
# pivoting to long
pivot_longer(Before:After,
names_to = "moment",
values_to = "moment_values") %>%
mutate(moment = fct_relevel(moment, c("Before"))) %>%
ggplot(aes(x = fct_rev(CARE_name_simple),
y = moment_values,
color = moment)) +
geom_point() +
coord_flip() +
labs(title = "Change in percentage of compliance\n(CARE = Yes) before and after 2013",
x = "CARE Items",
y = "Compliance percentage",
color = "Moment") +
theme(legend.position="top")

df %>%
# reorder the CARE names
mutate(CARE_name_simple = fct_inorder(CARE_name_simple)) %>%
# select only relevant columns
select(ID, moment, CARE_name_simple, CARE_value) %>%
# summarise the data
group_by(CARE_name_simple, CARE_value, moment) %>%
summarise(n = n()) %>%
# calculate the proportion
mutate(freq = n / sum(n)) %>%
select(-n) %>%
mutate(freq = freq * 100) %>%
mutate(moment = fct_relevel(moment, c("Before"))) %>%
filter(CARE_value == "Yes") %>%
ungroup() %>%
select(-CARE_value) %>%
pivot_wider(names_from = moment,
values_from = freq,
values_fill = FALSE) %>%
relocate(Before, .after = CARE_name_simple) %>%
ggplot() +
geom_segment(aes(
x = fct_rev(CARE_name_simple), xend = CARE_name_simple,
y = Before, yend = After ), color = "grey") +
geom_point(aes(x = CARE_name_simple, y = Before),
color = "green", size = 3) +
geom_point(aes(x = CARE_name_simple, y = After),
color = "red", size = 3) + coord_flip() +
coord_flip() +
labs(title = "Change in percentage of compliance\n(CARE = Yes) before and after 2013",
x = "CARE Items")

Wordcloud
docs <- Corpus(VectorSource(text))
docs <- docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("keywords"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
docs <- tm_map(docs, stemDocument)
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
## word freq
## parotid parotid 523
## gland gland 366
## case case 316
## carcinoma carcinoma 254
## present present 251
## metastat metastat 223
## metastasi metastasi 213
## cell cell 212
## patient patient 206
## report report 193
set.seed(1234)
wordcloud(
words = d$word,
freq = d$freq,
min.freq = 35,
max.words = 200,
random.order = TRUE,
rot.per = 0.35,
colors = brewer.pal(8, "Dark2")
)

Which terms are correlated?
# findAssocs(dtm, terms = "primary", corlimit = 0.3)
Ingus 1 march 2022
I: Agreement between Ingus and Peteris
0.75
S: what is the quality of the reports?
I: what is the quality before and after 2013
I: difference between journals