Quiz: Researcher Impact Analysis

Author

EFE SAHIN

Load Libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.5.2
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
data <- read_excel("Incites Researchers.xlsx")
New names:
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
data <- read_excel("Incites Researchers.xlsx")
New names:
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
glimpse(data)
Rows: 820
Columns: 31
$ percent <dbl> 100.00, 20.00, 100.00, 77.78, 91.67, 100.00, 100.00, 100.00, 1…
$ wos     <dbl> 1, 5, 3, 9, 12, 1, 1, 1, 2, 3, 1, 2, 2, 3, 1, 10, 1, 1, 3, 2, …
$ cnci    <dbl> 6.034700, 13.880000, 3.238100, 12.380133, 5.783275, 4.037300, …
$ rank    <dbl> 1, 2, 3, 4, 5, 6, 7, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 17,…
$ cites   <dbl> 1, 1, 10, 381, 2293, 1, 1, 1, 15, 56, 8, 2, 12, 5, 5, 35, 8, 8…
$ aff1    <chr> "Bogazici University", "Izmir Ekonomi Universitesi", "Izmir Ek…
$ aff2    <chr> "Izmir Ekonomi Universitesi", "Izmir Univ Econ", "AstraZeneca"…
$ aff3    <chr> "Anadolu University", "University of Southern Denmark", "Bogaz…
$ aff4    <chr> "University of Sheffield", NA, "Solar Biyoteknol Ltd SolarBiot…
$ aff5    <chr> "Gulhane Training & Research Hospital", NA, "Dokuz Eylul Unive…
$ aff6    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Kilis…
$ aff7    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Inst …
$ aff8    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ aff9    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ aff10   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ id      <chr> "EPD-7110-2022", "PHK-4697-2026", "AHD-9641-2022", "HTR-4086-2…
$ impact  <dbl> 56.045800, 33.080000, 20.084800, 12.858500, 11.324900, 9.74170…
$ ORCID   <chr> NA, NA, "0000-0002-8150-546X", "0000-0002-0276-4886", "0000-00…
$ ...19   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...20   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...21   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...22   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...23   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...24   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...25   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...26   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...27   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...28   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...29   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...30   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...31   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
names(data)
 [1] "percent" "wos"     "cnci"    "rank"    "cites"   "aff1"    "aff2"   
 [8] "aff3"    "aff4"    "aff5"    "aff6"    "aff7"    "aff8"    "aff9"   
[15] "aff10"   "id"      "impact"  "ORCID"   "...19"   "...20"   "...21"  
[22] "...22"   "...23"   "...24"   "...25"   "...26"   "...27"   "...28"  
[29] "...29"   "...30"   "...31"  
izmir_data <- data %>%
  filter(str_detect(aff1, "Izmir Ekonomi"))

izmir_data
# A tibble: 630 × 31
   percent   wos  cnci  rank cites aff1      aff2  aff3  aff4  aff5  aff6  aff7 
     <dbl> <dbl> <dbl> <dbl> <dbl> <chr>     <chr> <chr> <chr> <chr> <chr> <chr>
 1    20       5 13.9      2     1 Izmir Ek… Izmi… Univ… <NA>  <NA>  <NA>  <NA> 
 2   100       3  3.24     3    10 Izmir Ek… Astr… Boga… Sola… Doku… <NA>  <NA> 
 3    77.8     9 12.4      4   381 Izmir Ek… <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
 4   100       1  4.04     6     1 Izmir Ek… <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
 5   100       1  9.17     7     1 Izmir Ek… Medi… Hakk… <NA>  <NA>  <NA>  <NA> 
 6   100       2  5.66     9    15 Izmir Ek… Univ… Utah… Izmi… <NA>  <NA>  <NA> 
 7   100       3  4.62    10    56 Izmir Ek… <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
 8   100       1  6.91    11     8 Izmir Ek… <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
 9   100       2  5.92    12     2 Izmir Ek… <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
10   100       2  5.54    13    12 Izmir Ek… <NA>  <NA>  <NA>  <NA>  <NA>  <NA> 
# ℹ 620 more rows
# ℹ 19 more variables: aff8 <chr>, aff9 <chr>, aff10 <chr>, id <chr>,
#   impact <dbl>, ORCID <chr>, ...19 <lgl>, ...20 <chr>, ...21 <chr>,
#   ...22 <chr>, ...23 <chr>, ...24 <chr>, ...25 <chr>, ...26 <chr>,
#   ...27 <chr>, ...28 <chr>, ...29 <chr>, ...30 <chr>, ...31 <chr>
ggplot(izmir_data, aes(x = cnci)) +
  geom_histogram(bins = 10) +
  labs(
    title = "Histogram of Impact",
    x = "Impact (CNCI)",
    y = "Frequency"
  )

ggplot(izmir_data, aes(y = cnci)) +
  geom_boxplot() +
  labs(
    title = "Boxplot of Impact",
    y = "Impact (CNCI)"
  )

summary(izmir_data$cnci)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.2539  0.6258  0.8164 13.8800 
sd(izmir_data$cnci, na.rm = TRUE)
[1] 1.212516