1. R Markdown

Что такое наука

Процесс добычи научного знания

data <- readxl::read_excel("RUSSIA_JCR_2024.xlsx")
data$`JCR Abbreviation` <- NULL
data$`ISSN` <- NULL
data$`eISSN` <- NULL
data$Edition <- NULL

2. Считаем среднее по Category

# 1. приводим данные к нужному типу
data_clean <- data %>%
  mutate(
    # Преобразуем колонку в числовой тип. 
        `2023 JIF` = as.numeric(`2023 JIF`)
  )
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `2023 JIF = as.numeric(`2023 JIF`)`.
## Caused by warning:
## ! в результате преобразования созданы NA
# 2. считаем статистику
result_1 <- data_clean %>%
  select(`Category`, `2023 JIF`) %>%
  filter(!is.na(`2023 JIF`)) %>% # убираем строки не числовые
  group_by(`Category`) %>%
  summarise(
    Mean_JIF = round(mean(`2023 JIF`), 3),
    Count = n()
  ) %>%
  arrange(desc(Mean_JIF))

Средние значения JIF по журналам в одной категории

kable(result_1, col.names = c("Категория", "Средний JIF", "Журналов"))
Категория Средний JIF Журналов
CHEMISTRY, MULTIDISCIPLINARY 1.400 11
SOIL SCIENCE 1.400 1
FOOD SCIENCE & TECHNOLOGY 1.300 1
CHEMISTRY, INORGANIC & NUCLEAR 1.250 4
BIODIVERSITY CONSERVATION 1.200 1
POLYMER SCIENCE 1.200 3
MINING & MINERAL PROCESSING 1.167 3
BIOCHEMISTRY & MOLECULAR BIOLOGY 1.150 6
METEOROLOGY & ATMOSPHERIC SCIENCES 1.150 2
MICROBIOLOGY 1.150 2
CHEMISTRY, PHYSICAL 1.129 7
ELECTROCHEMISTRY 1.100 1
OCEANOGRAPHY 1.100 2
CELL BIOLOGY 1.067 3
CHEMISTRY, ORGANIC 1.067 3
BIOTECHNOLOGY & APPLIED MICROBIOLOGY 1.000 1
BUSINESS, FINANCE 1.000 1
CHEMISTRY, ANALYTICAL 1.000 1
LINGUISTICS 1.000 1
MATERIALS SCIENCE, MULTIDISCIPLINARY 1.000 7
MEDICINE, RESEARCH & EXPERIMENTAL 1.000 2
ASTRONOMY & ASTROPHYSICS 0.983 6
PHYSICS, MATHEMATICAL 0.980 5
PHYSICS, MULTIDISCIPLINARY 0.942 12
ACOUSTICS 0.900 1
AGRICULTURE, MULTIDISCIPLINARY 0.900 1
ENERGY & FUELS 0.900 5
ENGINEERING, PETROLEUM 0.900 3
MATERIALS SCIENCE, CHARACTERIZATION & TESTING 0.900 1
THERMODYNAMICS 0.900 4
WATER RESOURCES 0.900 1
PALEONTOLOGY 0.850 2
PHYSICS, FLUIDS & PLASMAS 0.850 2
PHYSICS, APPLIED 0.840 5
MINERALOGY 0.833 3
PLANT SCIENCES 0.833 3
EDUCATION & EDUCATIONAL RESEARCH 0.825 4
BIOPHYSICS 0.800 1
CHEMISTRY, MEDICINAL 0.800 1
ENGINEERING, CIVIL 0.800 1
GENETICS & HEREDITY 0.800 2
SPECTROSCOPY 0.800 1
GEOCHEMISTRY & GEOPHYSICS 0.770 10
NANOSCIENCE & NANOTECHNOLOGY 0.760 5
ENGINEERING, CHEMICAL 0.757 7
OPTICS 0.757 7
COMPUTER SCIENCE, INTERDISCIPLINARY APPLICATIONS 0.700 1
COMPUTER SCIENCE, SOFTWARE ENGINEERING 0.700 1
EDUCATION, SCIENTIFIC DISCIPLINES 0.700 1
ENDOCRINOLOGY & METABOLISM 0.700 1
ENGINEERING, GEOLOGICAL 0.700 2
ENTOMOLOGY 0.700 1
GEOLOGY 0.700 4
METALLURGY & METALLURGICAL ENGINEERING 0.700 8
SOCIAL SCIENCES, INTERDISCIPLINARY 0.700 3
ZOOLOGY 0.683 6
MARINE & FRESHWATER BIOLOGY 0.650 2
PHARMACOLOGY & PHARMACY 0.650 2
ENGINEERING, MECHANICAL 0.633 3
PHYSICS, ATOMIC, MOLECULAR & CHEMICAL 0.633 3
MECHANICS 0.613 8
GEOSCIENCES, MULTIDISCIPLINARY 0.612 8
BUSINESS 0.600 1
CHEMISTRY, APPLIED 0.600 1
EVOLUTIONARY BIOLOGY 0.600 1
GERIATRICS & GERONTOLOGY 0.600 1
MATERIALS SCIENCE, CERAMICS 0.600 3
MATHEMATICS 0.600 23
ECOLOGY 0.583 6
ENGINEERING, AEROSPACE 0.550 2
PHYSICS, CONDENSED MATTER 0.550 4
ECONOMICS 0.540 10
PSYCHOLOGY, MULTIDISCIPLINARY 0.537 8
MANAGEMENT 0.533 3
COMPUTER SCIENCE, ARTIFICIAL INTELLIGENCE 0.500 1
COMPUTER SCIENCE, CYBERNETICS 0.500 1
COMPUTER SCIENCE, INFORMATION SYSTEMS 0.500 1
COMPUTER SCIENCE, THEORY & METHODS 0.500 2
ENGINEERING, ELECTRICAL & ELECTRONIC 0.500 3
FISHERIES 0.500 1
INSTRUMENTS & INSTRUMENTATION 0.500 3
MATERIALS SCIENCE, TEXTILES 0.500 1
POLITICAL SCIENCE 0.500 2
COMMUNICATION 0.467 3
ENGINEERING, MULTIDISCIPLINARY 0.467 3
CRYSTALLOGRAPHY 0.450 2
MATHEMATICS, APPLIED 0.450 12
PSYCHOLOGY, APPLIED 0.450 2
STATISTICS & PROBABILITY 0.450 2
PHYSICS, PARTICLES & FIELDS 0.433 3
LANGUAGE & LINGUISTICS 0.411 9
AUTOMATION & CONTROL SYSTEMS 0.400 2
CARDIAC & CARDIOVASCULAR SYSTEMS 0.400 2
LOGIC 0.400 1
MATHEMATICS, INTERDISCIPLINARY APPLICATIONS 0.400 2
MUSIC 0.400 1
NUCLEAR SCIENCE & TECHNOLOGY 0.400 1
PHYSIOLOGY 0.400 2
PSYCHOLOGY, CLINICAL 0.400 2
PSYCHOLOGY, EXPERIMENTAL 0.400 1
TELECOMMUNICATIONS 0.400 1
AREA STUDIES 0.371 7
BIOLOGY 0.350 2
NEUROSCIENCES 0.350 2
INTERNATIONAL RELATIONS 0.333 3
SOCIOLOGY 0.333 6
GEOGRAPHY 0.300 1
HISTORY & PHILOSOPHY OF SCIENCE 0.300 2
INFORMATION SCIENCE & LIBRARY SCIENCE 0.300 2
LITERATURE, SLAVIC 0.300 3
MULTIDISCIPLINARY SCIENCES 0.300 3
ORTHOPEDICS 0.300 1
PHYSICS, NUCLEAR 0.300 1
SOCIAL ISSUES 0.300 2
ANTHROPOLOGY 0.250 2
HUMANITIES, MULTIDISCIPLINARY 0.250 14
LITERARY THEORY & CRITICISM 0.250 2
MEDICINE, GENERAL & INTERNAL 0.225 4
ENGINEERING, MARINE 0.200 1
FILM, RADIO, TELEVISION 0.200 1
FORESTRY 0.200 1
HEMATOLOGY 0.200 1
INFECTIOUS DISEASES 0.200 1
MEDIEVAL & RENAISSANCE STUDIES 0.200 1
PSYCHOLOGY, EDUCATIONAL 0.200 1
PUBLIC ADMINISTRATION 0.200 1
SPORT SCIENCES 0.200 1
TRANSPLANTATION 0.200 1
HISTORY 0.179 14
LAW 0.167 6
PHILOSOPHY 0.157 7
ARCHAEOLOGY 0.150 2
LITERATURE 0.150 2
CLASSICS 0.100 1
CONSTRUCTION & BUILDING TECHNOLOGY 0.100 1
CRIMINOLOGY & PENOLOGY 0.100 1
ONCOLOGY 0.100 1
RELIGION 0.100 1

3. Считаем среднее по JIF Quartile

# 3. считаем статистику
result_2 <- data_clean %>%
  select(`JIF Quartile`, `2023 JIF`) %>%
  filter(!is.na(`2023 JIF`)) %>% # убираем строки не числовые
  group_by(`JIF Quartile`) %>%
  summarise(
    Mean_JIF = round(mean(`2023 JIF`), 3),
    Count = n()
  ) %>%
  arrange(desc(Mean_JIF))

Средние значения JIF по журналам для отдельной категории

kable(result_2, col.names = c("Квартиль", "Средний JIF", "Журналов"))
Квартиль Средний JIF Журналов
Q1 3.000 4
Q3 0.878 72
Q2 0.811 18
Q4 0.584 304
N/A 0.257 44
# точечную диаграмма
ggplot(result_1, aes(x = Count, y = Mean_JIF)) +
  geom_point(color = "steelblue", size = 3, alpha = 0.7) + 
  geom_smooth(method = "lm", se = FALSE, color = "darkred", linetype = "dashed") + 
  labs(
    title = "Зависимость среднего JIF от количества журналов в категории",
    x = "Количество журналов в категории",
    y = "Средний импакт-фактор (2023 JIF)"
  ) +
  theme_minimal() + 
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5, color = "gray40")
  )
## `geom_smooth()` using formula = 'y ~ x'