---title: "Use of ChatGPT in Dental Scientific Writing"author: "Sergio Uribe"date-modified: last-modifiedformat: html: toc: truetoc-expand: 3code-fold: truecode-tools: trueeditor: visualexecute: echo: false cache: false warning: false message: false---```{r}# Load required libraries with pacman; installs them if not already installedpacman::p_load(tidyverse, # tools for data science janitor, # for data cleaning and tables here, # for reproducible research scales, lubridate )``````{r}theme_set(theme_minimal())```# Dataset```{r}#| echo: falsedf <-read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vRpbTAtwkQIs9aJ5_jajV2L-KL3qkV8IDfS1CeT4md1DNQjBH-QTVBV87XHpBKArkTQxGcV4NCHuUzt/pub?gid=1581422773&single=true&output=csv", na =c("#N/A")) |> janitor::remove_empty(which =c("rows", "cols"))``````{r}# glimpse(df)```## EDA```{r}#| echo: falsedf <- df |>select(Year...1:revolutionize...9)``````{r}# glimpse(df)``````{r}df |>rename(Year = Year...1) |>mutate(# Calculate new variablesDelve_ratio = (Delve /`Dental papers`) *10000,Realm_ratio = (Realm /`Dental papers`) *10000,Intricacies_ratio = (Intricacies /`Dental papers`) *10000,underscore_ratio = (underscore /`Dental papers`) *10000,Spearhead_ratio = (`Spearhead...7`/`Dental papers`) *10000,showcasing_ratio = (`showcasing...8`/`Dental papers`) *10000,revolutionize_ratio = (`revolutionize...9`/`Dental papers`) *10000) |># remove variablesselect(Year, Delve_ratio:revolutionize_ratio) |> janitor::clean_names(case =c("upper_camel")) |># pivot the dfpivot_longer(-Year) |># Rename the new variables to remove '_ratio'mutate(name =str_replace(name, "Ratio", "")) |># filter the year to the last 20 yearsfilter(Year >=2014, Year <=2024) |># make the plotggplot(aes(x = Year, y = value, color = name, group = name)) +geom_line() +geom_vline(xintercept =2022, color ="#3b528b", linetype ="dashed") +# Add the red line for the year 2022labs(title ="Normalized potential ChatGTP fingerprints in abstracts \n per 10 000 dental articules", subtitle ="PubMed Abstracts, Jan 2014 - March 2024", y ="Abstracts per 10 000 Articles (log 10)", color ="Word") +facet_grid(. ~ name ) +scale_y_log10()``````{r}df |>rename(Year = Year...1) |>mutate(# Calculate new variablesDelve_ratio = (Delve /`Dental papers`) *10000,Realm_ratio = (Realm /`Dental papers`) *10000,Intricacies_ratio = (Intricacies /`Dental papers`) *10000,underscore_ratio = (underscore /`Dental papers`) *10000,Spearhead_ratio = (`Spearhead...7`/`Dental papers`) *10000,showcasing_ratio = (`showcasing...8`/`Dental papers`) *10000,revolutionize_ratio = (`revolutionize...9`/`Dental papers`) *10000) |># remove variablesselect(Year, Delve_ratio:revolutionize_ratio) |> janitor::clean_names(case =c("upper_camel")) |># pivot the dfpivot_longer(-Year) |># Rename the new variables to remove '_ratio'mutate(name =str_replace(name, "Ratio", "")) |># filter the year to the last 20 yearsfilter(Year >=2014, Year <=2024) |># make the plotggplot(aes(x = Year, y = value, color = name, group = name)) +geom_line() +geom_vline(xintercept =2022, color ="#3b528b", linetype ="dashed") +# Add the red line for the year 2022labs(title ="Normalized potential ChatGTP fingerprints in abstracts \n per 10 000 dental articules", subtitle ="PubMed Abstracts, Jan 2014 - March 2024", y ="Abstracts per 10 000 Articles (log 10)", color ="Word") +facet_grid(name ~ . ) +scale_y_log10()``````{r}df_mini <- df |>rename(Year = Year...1) |>filter(Year >=2004) |>mutate(# Calculate new variablesDelve_ratio = (Delve /`Dental papers`) *10000,Realm_ratio = (Realm /`Dental papers`) *10000,Intricacies_ratio = (Intricacies /`Dental papers`) *10000,underscore_ratio = (underscore /`Dental papers`) *10000,Spearhead_ratio = (`Spearhead...7`/`Dental papers`) *10000,showcasing_ratio = (`showcasing...8`/`Dental papers`) *10000,revolutionize_ratio = (`revolutionize...9`/`Dental papers`) *10000) |># remove variablesselect(Year, Delve_ratio:revolutionize_ratio) |> janitor::clean_names(case =c("upper_camel")) |># pivot the dfpivot_longer(-Year) |># Rename the new variables to remove '_ratio'mutate(name =str_replace(name, "Ratio", ""))```### Change in ratio before and after```{r}df_mini |># Create a new category based on the yearmutate(period =case_when( Year <2022~"a 2004 to 2021", Year >=2022~"b 2022 to 2024" )) |># Group by the new period category and namegroup_by(name, period) |># Calculate the average value for each groupsummarise(average_value =mean(value, na.rm =TRUE)) |>ggplot(aes(x = period, y = average_value, color = name, group = name)) +geom_point() +geom_line() +labs(title ="Change in the use of fingerprint words since 2022 per 10 000 dental abstracts", subtitle ="PubMed, Jan 1990 - March 2024", y ="Abstracts per 10 000 Dental Articles", color ="Word") ```### Calculate the change before 2022#### Percentage of change```{r}df_mini |># Create a new category based on the yearmutate(period =case_when( Year <2022~"a 2004 to 2021", Year >=2022~"b 2022 to 2024" )) |># Group by the new period category and namegroup_by(name, period) |># Calculate the average value for each groupsummarise(average_value =mean(value, na.rm =TRUE)) |>ungroup() |>pivot_wider(names_from = period, values_from = average_value) |># Calculate the changemutate(change_percent = (`b 2022 to 2024`-`a 2004 to 2021`) /`a 2004 to 2021`*100) |> knitr::kable()``````{r}df_mini |># Create a new category based on the yearmutate(period =case_when( Year <2022~"a 2004 to 2021", Year >=2022~"b 2022 to 2024" )) |># Group by the new period category and namegroup_by(name, period) |># Calculate the average value for each groupsummarise(average_value =mean(value, na.rm =TRUE)) |>ungroup() |>pivot_wider(names_from = period, values_from = average_value) |># Calculate the changemutate(change_percent = (`b 2022 to 2024`-`a 2004 to 2021`) /`a 2004 to 2021`*100) |>ggplot(aes(x =fct_reorder(name, change_percent) , y = change_percent)) +geom_col() +coord_flip() +labs(title ="Percent Change in Variable Before and After 2022", y ="Percent", x ="") +scale_y_continuous(labels = scales::percent_format(scale =1))```#### Ratio of change```{r}df_mini |># Create a new category based on the yearmutate(period =case_when( Year <2022~"a 2004 to 2021", Year >=2022~"b 2022 to 2024" )) |># Group by the new period category and namegroup_by(name, period) |># Calculate the average value for each groupsummarise(average_value =mean(value, na.rm =TRUE)) |>ungroup() |>pivot_wider(names_from = period, values_from = average_value) |># calculate the change mutate(change = (`b 2022 to 2024`/`a 2004 to 2021`) *100) |> knitr::kable()``````{r}df_mini |># Create a new category based on the yearmutate(period =case_when( Year <2022~"a 2004 to 2021", Year >=2022~"b 2022 to 2024" )) |># Group by the new period category and namegroup_by(name, period) |># Calculate the average value for each groupsummarise(average_value =mean(value, na.rm =TRUE)) |>ungroup() |>pivot_wider(names_from = period, values_from = average_value) |># calculate the change mutate(ratio = (`b 2022 to 2024`/`a 2004 to 2021`) *100) |>ggplot(aes(x =fct_reorder(name, ratio) , y = ratio)) +geom_col() +coord_flip() +labs(title ="Ratio Change in Variable Before and After 2022", y ="Ratio", x ="")```###