# Load the required packages
library(tidyverse)
library(readxl)
# Read the Excel file from the data folder
researchers <- read_excel("data/Incites Researchers.xlsx")Week 4-5 Assignment: EDA of Researcher Impact
Research Question
What does the distribution of research impact look like among researchers whose first affiliation is Izmir Ekonomi Universitesi?
Step 1: Load Packages and Read the Data
Step 2: Create a Tibble and Glimpse the Data
# Convert to tibble so it prints nicely
researchers <- as_tibble(researchers)
# glimpse() shows us the number of rows, columns,
# column names, data types, and first few values
glimpse(researchers)Rows: 820
Columns: 31
$ percent <dbl> 100.00, 20.00, 100.00, 77.78, 91.67, 100.00, 100.00, 100.00, 1…
$ wos <dbl> 1, 5, 3, 9, 12, 1, 1, 1, 2, 3, 1, 2, 2, 3, 1, 10, 1, 1, 3, 2, …
$ cnci <dbl> 6.034700, 13.880000, 3.238100, 12.380133, 5.783275, 4.037300, …
$ rank <dbl> 1, 2, 3, 4, 5, 6, 7, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 17,…
$ cites <dbl> 1, 1, 10, 381, 2293, 1, 1, 1, 15, 56, 8, 2, 12, 5, 5, 35, 8, 8…
$ aff1 <chr> "Bogazici University", "Izmir Ekonomi Universitesi", "Izmir Ek…
$ aff2 <chr> "Izmir Ekonomi Universitesi", "Izmir Univ Econ", "AstraZeneca"…
$ aff3 <chr> "Anadolu University", "University of Southern Denmark", "Bogaz…
$ aff4 <chr> "University of Sheffield", NA, "Solar Biyoteknol Ltd SolarBiot…
$ aff5 <chr> "Gulhane Training & Research Hospital", NA, "Dokuz Eylul Unive…
$ aff6 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Kilis…
$ aff7 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Inst …
$ aff8 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ aff9 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ aff10 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ id <chr> "EPD-7110-2022", "PHK-4697-2026", "AHD-9641-2022", "HTR-4086-2…
$ impact <dbl> 56.045800, 33.080000, 20.084800, 12.858500, 11.324900, 9.74170…
$ ORCID <chr> NA, NA, "0000-0002-8150-546X", "0000-0002-0276-4886", "0000-00…
$ ...19 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...20 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...21 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...22 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...23 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...24 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...25 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...26 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...27 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...28 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...29 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...30 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ ...31 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
Step 3: Filter for Izmir Ekonomi Universitesi
# Keep only researchers whose first affiliation (aff1) is
# Izmir Ekonomi Universitesi
# str_detect() checks if the aff1 column contains that string
ieu <- researchers |>
filter(str_detect(aff1, "Izmir Ekonomi Universitesi"))
# How many researchers passed the filter?
nrow(ieu)[1] 630
Step 4: Histogram of Impact
# A histogram shows the shape of the distribution
# Each bar represents a range of impact values
# binwidth = 1 means each bar covers 1 unit of impact
ieu |>
ggplot(aes(x = impact)) +
geom_histogram(binwidth = 1, fill = "steelblue", color = "white") +
labs(
title = "Distribution of Research Impact",
subtitle = "First affiliation: Izmir Ekonomi Universitesi",
x = "Impact Score",
y = "Number of Researchers"
) +
theme_minimal()Step 5: Boxplot of Impact
# A boxplot shows the five-number summary:
# minimum, Q1, median, Q3, maximum
# Dots outside the whiskers are outliers
ieu |>
ggplot(aes(y = impact)) +
geom_boxplot(fill = "steelblue", color = "black", alpha = 0.7) +
labs(
title = "Boxplot of Research Impact",
subtitle = "First affiliation: Izmir Ekonomi Universitesi",
y = "Impact Score"
) +
theme_minimal()Step 6: Summary Statistics
# Calculate summary statistics to support our interpretation
ieu |>
summarize(
n = n(),
mean_impact = mean(impact, na.rm = TRUE),
median_impact = median(impact, na.rm = TRUE),
sd_impact = sd(impact, na.rm = TRUE),
min_impact = min(impact, na.rm = TRUE),
max_impact = max(impact, na.rm = TRUE)
)# A tibble: 1 × 6
n mean_impact median_impact sd_impact min_impact max_impact
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 630 0.689 0.241 1.87 0 33.1
Step 7: Interpretation
Your answer: The distribution of research impact among Izmir Ekonomi Universitesi researchers is strongly right skewed. The histogram shows that most researchers are concentrated at the lower end of the impact scale, with a long tail stretching toward higher values. This means a small number of researchers have very high impact scores while the majority have modest ones. The boxplot confirms this.