В этом отчёте рассчитывается оценка суммарной рождаемости (TFR) для каждой национальности на 2021 год. Подход: взвешиваем региональные TFR по распределению соответствующей национальности по регионам (предполагая одинаковую по полу‑возрастную структуру у национальностей и общее отсутствие различий в возрастном профиле фертильности внутри региона).
Входные файлы (локальные пути можно изменить):
Категоризация народов России.xlsx — справочник
национальностей с колонками Nationality,
Status, Ethnicity.Ethnicity_regions.xlsx (лист
Население 2021) — численность по национальностям по
регионам (строки — национальности, столбцы — регионы).tfr_regions_2021.csv — возрастные коэффициенты
рождаемости женщин по регионам (строки: регион × 5‑летняя возрастная
группа; колонка 2021 — ASFR на 1000 женщин).Важно: если для части регионов отсутствуют сопоставления между именами в двух таблицах, они будут явно показаны в диагностике.
library(tidyverse)
library(readxl)
library(janitor)
library(stringr)
library(ggplot2)
# ==== Пути к файлам (измените при необходимости) ====
path_cat <- "Категоризация народов России.xlsx"
path_eth <- "Ethnicity_regions.xlsx"
path_tfr <- "tfr_regions_2021.csv"
path_age <- "age_gender_nationality_all.csv" # файл с пирамидой по нациям
path_total <- "age_gender_2021_years5.csv" # полный итог по РФ
current_input <- tryCatch(knitr::current_input(), error = function(e) NULL)
base_dir <- if (is.null(current_input)) getwd() else dirname(normalizePath(current_input))
script_path <- file.path(base_dir, "demography_inputs.R")
if (!file.exists(script_path)) {
script_path <- "demography_inputs.R"
}
source(script_path, local = TRUE)
total_population_file <- file.path(base_dir, path_total)
if (!file.exists(total_population_file)) {
total_population_file <- path_total
}
total_population_russia <- readr::read_csv(total_population_file, show_col_types = FALSE)
demography_inputs <- prepare_nationality_demography(
path_cat = path_cat,
path_eth = path_eth,
path_tfr = path_tfr,
path_age = path_age
)
cat_tbl <- demography_inputs$cat_tbl
eth_tbl_raw <- demography_inputs$eth_tbl_raw
eth_tbl_nosvc <- demography_inputs$eth_tbl_nosvc
eth_tbl <- demography_inputs$eth_tbl
region_map <- demography_inputs$region_map
share_f1549 <- demography_inputs$share_f1549
age_parsed <- demography_inputs$age_parsed
ap_sex <- demography_inputs$ap_sex
coverage <- demography_inputs$coverage
tfr_by_nat <- demography_inputs$tfr_by_nat
tfr_by_nat_ext <- demography_inputs$tfr_by_nat_ext
tfr_by_ethnicity <- demography_inputs$tfr_by_ethnicity
tfr_by_status <- demography_inputs$tfr_by_status
consistency <- demography_inputs$consistency
eth_consistency <- demography_inputs$eth_consistency
status_consistency <- demography_inputs$status_consistency
age_population_gap <- demography_inputs$age_population_gap
overall_population_gap <- demography_inputs$overall_population_gap
unmapped_tfr <- demography_inputs$unmapped$unmapped_tfr
unmapped_eth <- demography_inputs$unmapped$unmapped_eth
population_target_total <- demography_inputs$metadata$population_target_total
population_baseline_total <- demography_inputs$metadata$population_baseline_total
population_scale_factor <- demography_inputs$metadata$population_scale_factor
russia_tfr <- demography_inputs$russia_tfr
share_f1549 <- demography_inputs$share_f1549
unknown_profile <- demography_inputs$unknown_profile
others_pyr <- unknown_profile
# ==== 7) Пирамиды по нациям ====
# данные уже получены функцией prepare_nationality_demography()
population_target_total <- demography_inputs$metadata$population_target_total
population_baseline_total <- demography_inputs$metadata$population_baseline_total
population_scale_factor <- demography_inputs$metadata$population_scale_factor
## $unmapped_tfr
## # A tibble: 0 × 2
## # ℹ 2 variables: region <chr>, tfr_2021 <dbl>
##
## $unmapped_eth
## # A tibble: 0 × 1
## # ℹ 1 variable: eth_region <chr>
Если список сверху пустой для обоих типов регионов, сопоставление прошло полностью.
coverage <- demography_inputs$coverage
tfr_by_nat <- demography_inputs$tfr_by_nat
tfr_by_nat_ext <- demography_inputs$tfr_by_nat_ext
tfr_by_ethnicity <- demography_inputs$tfr_by_ethnicity
tfr_by_status <- demography_inputs$tfr_by_status
consistency <- demography_inputs$consistency
eth_consistency <- demography_inputs$eth_consistency
status_consistency <- demography_inputs$status_consistency
list(
head_nationalities = head(tfr_by_nat_ext, 10),
russia_tfr = demography_inputs$russia_tfr,
consistency = consistency,
nationality_structure = demography_inputs$tfr_status$nationality$table %>%
select(nationality, status, tfr_nat) %>%
head(10),
sample_female_weights = lapply(
demography_inputs$tfr_status$nationality$weights[1:2],
function(mat) {
if (is.null(mat)) return(NULL)
tibble(
age = rownames(mat)[1:5],
t(mat[1:5, , drop = FALSE])
)
}
)
)
## $head_nationalities
## # A tibble: 10 × 6
## nationality status ethnicity pop_total fem1549_total tfr_nat
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Абазины коренной Северный Кавказ 41793 20760. 1.36
## 2 Аварцы коренной Северный Кавказ 1012074 523929. 1.75
## 3 Агулы коренной Северный Кавказ 34576 18814. 1.71
## 4 Адыгейцы коренной Северный Кавказ 111471 50278. 1.53
## 5 Азербайджанцы мигрант Кавказ 474576 259522. 1.55
## 6 Алеуты коренной Сибирь/Дальний Восток 397 211. 1.58
## 7 Алтайцы коренной Сибирь/Дальний Восток 78125 36073. 2.03
## 8 Армяне мигрант Кавказ 946172 481556. 1.48
## 9 Балкарцы коренной Северный Кавказ 125044 61833. 1.66
## 10 Башкиры коренной Башкиры 1571879 685381. 1.51
##
## $russia_tfr
## [1] 1.488078
##
## $consistency
## # A tibble: 1 × 3
## russia_tfr nationality_weighted_tfr abs_diff
## <dbl> <dbl> <dbl>
## 1 1.49 1.49 0.000485
##
## $nationality_structure
## # A tibble: 10 × 3
## nationality status tfr_nat
## <chr> <chr> <dbl>
## 1 Абазины коренной 1.36
## 2 Аварцы коренной 1.75
## 3 Агулы коренной 1.71
## 4 Адыгейцы коренной 1.53
## 5 Азербайджанцы мигрант 1.55
## 6 Алеуты коренной 1.58
## 7 Алтайцы коренной 2.03
## 8 Армяне мигрант 1.48
## 9 Балкарцы коренной 1.66
## 10 Башкиры коренной 1.51
##
## $sample_female_weights
## $sample_female_weights[[1]]
## NULL
##
## $sample_female_weights[[2]]
## NULL
library(ggplot2)
# Стандартизируем подписи возрастов для красивых осей
age_levels <- c("0 – 4","5 – 9","10 – 14","15 – 19","20 – 24","25 – 29","30 – 34","35 – 39","40 – 44","45 – 49",
"50 – 54","55 – 59","60 – 64","65 – 69","70 – 74","75 – 79","80 – 84","85 – 89","90 – 94","95 – 99","100 и более",
"0-4","5-9","10-14","15-19","20-24","25-29","30-34","35-39","40-44","45-49",
"50-54","55-59","60-64","65-69","70-74","75-79","80-84","85-89","90-94","95-99","100+")
age_plot <- age_parsed |>
mutate(age_std = str_replace_all(age, "\\s*–\\s*", "-"),
age_std = str_replace_all(age_std, "и более", "+"),
age_std = str_trim(age_std)) |>
mutate(age_std = factor(age_std, levels = unique(c(age_levels[age_levels %in% age_std], age_std))))
plot_pyramid <- function(nat){
df <- age_plot |> filter(nationality == nat)
if(nrow(df) == 0) return(NULL)
df2 <- df |>
mutate(male_neg = -male/1000, female_pos = female/1000) # тысячи чел.
ggplot(df2, aes(y = age_std)) +
geom_col(aes(x = male_neg), width = 0.9) +
geom_col(aes(x = female_pos), width = 0.9) +
scale_x_continuous(labels = abs) +
labs(x = "Численность, тыс.", y = "Возраст", title = paste0("Поло-возрастная пирамида — ", nat)) +
theme_minimal(base_size = 12)
}
# Папка для выгрузки
out_dir <- "pyramids"
if(!dir.exists(out_dir)) dir.create(out_dir)
nats <- sort(unique(age_parsed$nationality))
# Сохраним все пирамиды в PNG (по ~1200×800)
for(nat in nats){
p <- plot_pyramid(nat)
if(!is.null(p)) ggsave(filename = file.path(out_dir, paste0("pyramid_", nat, ".png")), plot = p, width = 12, height = 8, dpi = 150)
}
# Выведем первые 3 пирамиды в отчёт
head_nats <- head(nats, 3)
plist <- lapply(head_nats, plot_pyramid)
plist
## [[1]]
##
## [[2]]
##
## [[3]]
# Экспорт в CSV
readr::write_csv(tfr_by_nat_ext, "TFR_by_nationality_2021.csv")
readr::write_csv(coverage, "TFR_by_nationality_coverage_2021.csv")
readr::write_csv(consistency, "TFR_consistency_check_2021.csv")
readr::write_csv(tfr_by_ethnicity, "TFR_by_ethnicity_2021.csv")
readr::write_csv(eth_consistency, "TFR_ethnicity_consistency_2021.csv")
readr::write_csv(tfr_by_status, "TFR_by_status_2021.csv")
readr::write_csv(status_consistency, "TFR_status_consistency_2021.csv")
readr::write_csv(age_population_gap, "age_population_gap_by_nationality.csv")
readr::write_csv(overall_population_gap, "age_population_gap_overall.csv")
# Короткий обзор распределения TFR
summary(tfr_by_nat_ext$tfr_nat)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.210 1.483 1.540 1.584 1.664 2.831 1
# Национальности из справочника, которых нет в этно-таблице
missing_in_eth <- setdiff(cat_tbl$nationality, unique(eth_tbl$`Показатель`))
missing_in_eth
## [1] "Алюторцы" "Казаки" "Кряшены" "Мордва–эрзя"
## [5] "Сету (сето)" "Теленгиты" "Тофалары (тофа)" "Тубалары"
## [9] "Тувинцы-тоджинцы" "Уйльта (ороки)" "Челканцы" "Эвены (ламуты)"
## [13] "Якуты (саха)"
# Национальности, где покрытие < 80%
low_cov <- coverage |> filter(coverage_share < 0.8) |> arrange(coverage_share)
low_cov |> head(20)
# Проверка близости национально-взвешенного TFR к общероссийскому
consistency
if (consistency$abs_diff[1] > 0.05) {
warning(sprintf("Разность между общероссийским TFR и сложением по нациям = %.3f (>0.05). Проверьте веса/сопоставления.", consistency$abs_diff[1]))
}
# Дополнительные проверки консистентности агрегатов
eth_consistency
status_consistency
if (eth_consistency$abs_diff[1] > 0.05) {
warning(sprintf("Взвешенная по макрогруппам этничности TFR расходится с по-национальному взвешенной на %.3f (>0.05).", eth_consistency$abs_diff[1]))
}
if (status_consistency$abs_diff[1] > 0.05) {
warning(sprintf("Взвешенная по статусам TFR расходится с по-национальному взвешенной на %.3f (>0.05).", status_consistency$abs_diff[1]))
}
path_* в
блоке setup.manual_map.TFR_by_nationality_2021.csv — итоговая таблица:
nationality, status, ethnicity,
pop_total, tfr_nat.TFR_by_nationality_coverage_2021.csv — диагностика
покрытия по национальностям.library(dplyr)
library(stringr)
library(knitr)
library(rlang)
# 1) Определяем столбцы в age_parsed гибко по названиям
ap <- age_parsed
nm <- names(ap) |> tolower()
# столбец с нацией
nat_col <- names(ap)[match(TRUE, nm %in% c("nationality","nation","национальность","ethnicity","этнос"))]
if (is.na(nat_col)) stop("Не найден столбец с нацией/национальностью в age_parsed.")
# столбцы с численностью
female_col <- names(ap)[match(TRUE, nm %in% c("female","женщины","жен","f"))]
male_col <- names(ap)[match(TRUE, nm %in% c("male","мужчины","муж","m"))]
count_col <- names(ap)[match(TRUE, nm %in% c("n","count","число","population","pop","население","value","qty"))]
# 2) Приводим численности к числам и считаем итог по строке
clean_num <- function(x) as.numeric(gsub("[^0-9\\-\\.]", "", x))
ap_num <-
if (!is.na(female_col) && !is.na(male_col)) {
ap |>
mutate(
.female = clean_num(.data[[female_col]]),
.male = clean_num(.data[[male_col]]),
.n = .female + .male
)
} else if (!is.na(count_col)) {
ap |>
mutate(.n = clean_num(.data[[count_col]]))
} else {
stop("Не удалось распознать столбцы с численностью (male/female или общий count).")
}
# 3) Агрегируем по нациям, сортируем и добавляем «Итого»
res <-
ap_num |>
group_by(!!sym(nat_col)) |>
summarise(Население = sum(.n, na.rm = TRUE), .groups = "drop") |>
arrange(desc(Население)) |>
rename(Нация = !!sym(nat_col))
res_total <- bind_rows(
res,
tibble(Нация = "Итого", Население = sum(res$Население, na.rm = TRUE))
)
# 4) Вывод аккуратной таблицы
kable(
res_total,
caption = "Численность населения по нациям (источник: age_parsed)",
format.args = list(big.mark = " ", scientific = FALSE),
align = c("l","r")
)
| Нация | Население |
|---|---|
| Русские | 120 786 564 |
| Татары | 5 392 392 |
| Чеченцы | 1 916 115 |
| Башкиры | 1 798 309 |
| Чуваши | 1 220 868 |
| Другие народы | 1 204 914 |
| Аварцы | 1 157 871 |
| Армяне | 1 082 480 |
| Украинцы | 1 011 361 |
| Даргинцы | 716 873 |
| Казахи | 677 255 |
| Кумыки | 647 354 |
| Кабардинцы | 598 814 |
| Ингуши | 591 699 |
| Лезгины | 559 007 |
| Осетины | 555 614 |
| Мордва | 554 250 |
| Якуты (саха) | 547 339 |
| Азербайджанцы | 542 955 |
| Буряты | 526 337 |
| Марийцы | 484 869 |
| Удмурты | 442 154 |
| Таджики | 400 707 |
| Узбеки | 369 866 |
| Тувинцы | 337 950 |
| Крымские татары | 294 715 |
| Карачаевцы | 258 884 |
| Белорусы | 238 034 |
| Немцы | 223 397 |
| Калмыки | 205 431 |
| Лакцы | 198 414 |
| Цыгане | 198 393 |
| Табасараны | 173 301 |
| Коми | 164 206 |
| Киргизы | 157 646 |
| Балкарцы | 143 079 |
| Турки | 133 539 |
| Черкесы | 131 239 |
| Грузины | 129 029 |
| Адыгейцы | 127 545 |
| Ногайцы | 124 772 |
| Корейцы | 100 486 |
| Евреи | 94 569 |
| Алтайцы | 89 398 |
| Молдаване | 88 694 |
| Хакасы | 70 225 |
| Коми-пермяки | 63 844 |
| Греки | 61 768 |
| Казаки | 57 784 |
| Мордва–эрзя | 57 299 |
| Ненцы | 56 971 |
| Абазины | 47 918 |
| Туркмены | 47 311 |
| Эвенки | 45 112 |
| Агулы | 39 578 |
| Рутульцы | 39 213 |
| Карелы | 37 116 |
| Ханты | 36 167 |
| Кряшены | 34 319 |
| Эвены (ламуты) | 22 866 |
| Чукчи | 18 581 |
| Манси | 14 096 |
| Нанайцы | 13 362 |
| Шорцы | 12 119 |
| Долганы | 9 375 |
| Коряки | 8 592 |
| Тувинцы-тоджинцы | 8 357 |
| Нагайбаки | 6 602 |
| Вепсы | 5 376 |
| Сойоты | 5 026 |
| Нивхи | 4 436 |
| Тубалары | 4 221 |
| Селькупы | 4 008 |
| Теленгиты | 3 352 |
| Ительмены | 3 013 |
| Ульчи | 2 855 |
| Кумандинцы | 2 824 |
| Шапсуги | 2 612 |
| Телеуты | 2 579 |
| Бесермяне | 2 378 |
| Юкагиры | 2 089 |
| Эскимосы | 1 914 |
| Камчадалы | 1 804 |
| Саамы | 1 789 |
| Удэгейцы | 1 535 |
| Челканцы | 1 520 |
| Кеты | 1 270 |
| Чуванцы | 1 047 |
| Тофалары (тофа) | 843 |
| Нганасаны | 807 |
| Орочи | 622 |
| Негидальцы | 567 |
| Алеуты | 475 |
| Чулымцы | 453 |
| Уйльта (ороки) | 323 |
| Сету (сето) | 293 |
| Тазы | 287 |
| Ижорцы | 277 |
| Энцы | 248 |
| Водь | 137 |
| Алюторцы | 127 |
| Кереки | 38 |
| Итого | 148 268 408 |
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(stringr)
# ---- 1) Свод по нациям из age_parsed (оставляем как есть) ----
# ap_sex уже был вычислен ранее (female/male на строку)
res_by_nat <-
ap_sex |>
mutate(n = female + male) |>
group_by(nationality) |>
summarise(Население = sum(n, na.rm = TRUE), .groups = "drop") |>
rename(Нация = nationality) |>
arrange(desc(Население))
# ---- 2) Остаток по возрастам: РФ - сумма всех наций ----
standardize_age_group <- function(x) {
x |>
str_replace_all("[–—]", "-") |>
str_replace_all("и более", "+") |>
str_replace_all("\\s+", "") |>
str_trim()
}
sum_nat_by_age <- ap_sex |>
mutate(age_key = standardize_age_group(age_group)) |>
group_by(age_key) |>
summarise(
female = sum(female, na.rm = TRUE),
male = sum(male, na.rm = TRUE),
.groups = "drop"
)
rus_total_by_age <- total_population_russia |>
mutate(
age_key = standardize_age_group(age_group),
female = as.numeric(female),
male = as.numeric(male)
)
others_raw <- rus_total_by_age |>
left_join(sum_nat_by_age, by = "age_key", suffix = c("_rus", "_sum")) |>
mutate(
female_sum = coalesce(female_sum, 0),
male_sum = coalesce(male_sum, 0),
female_residual = female_rus - female_sum,
male_residual = male_rus - male_sum
) |>
transmute(
age_group,
age_key,
female = pmax(female_residual, 0),
male = pmax(male_residual, 0)
)
# ---- 3) Теоретический недостающий итог и масштабирование остатка ----
total_rus <- sum(rus_total_by_age$female + rus_total_by_age$male, na.rm = TRUE)
total_all_nations <- sum(ap_sex$female + ap_sex$male, na.rm = TRUE)
missing_total <- max(total_rus - total_all_nations, 0)
cur_total <- sum(others_raw$female + others_raw$male, na.rm = TRUE)
if (missing_total == 0 || cur_total == 0) {
others_scaled <- others_raw |>
transmute(
age_group,
age_key,
female_raw = 0,
male_raw = 0
)
} else {
scale_k <- missing_total / cur_total
others_scaled <- others_raw |>
transmute(
age_group,
age_key,
female_raw = female * scale_k,
male_raw = male * scale_k
)
}
# Округление после распределения с учётом остатков
others_pyr <- others_scaled |>
mutate(
female = round(female_raw),
male = round(male_raw),
female_residual = female_raw - female,
male_residual = male_raw - male
)
diff_fin <- round(missing_total - sum(others_pyr$female + others_pyr$male, na.rm = TRUE))
if (!is.na(diff_fin) && diff_fin != 0 && nrow(others_pyr) > 0) {
adjust_long <- others_pyr |>
select(age_group, age_key, female, male, female_residual, male_residual) |>
pivot_longer(c(female, male), names_to = "sex", values_to = "value") |>
mutate(
residual = if_else(sex == "female", female_residual, male_residual),
sex_key = paste(age_key, sex, sep = "::")
)
if (diff_fin > 0) {
order_keys <- adjust_long |>
arrange(desc(residual)) |>
pull(sex_key)
if (length(order_keys) == 0) {
order_keys <- adjust_long$sex_key
}
idx <- 1
n_keys <- length(order_keys)
while (diff_fin > 0 && n_keys > 0) {
key <- order_keys[((idx - 1) %% n_keys) + 1]
pos <- adjust_long$sex_key == key
adjust_long$value[pos] <- adjust_long$value[pos] + 1
diff_fin <- diff_fin - 1
idx <- idx + 1
}
} else {
order_keys <- adjust_long |>
arrange(residual) |>
pull(sex_key)
if (length(order_keys) == 0) {
order_keys <- adjust_long$sex_key
}
idx <- 1
n_keys <- length(order_keys)
while (diff_fin < 0 && n_keys > 0) {
key <- order_keys[((idx - 1) %% n_keys) + 1]
pos <- adjust_long$sex_key == key
if (adjust_long$value[pos] > 0) {
adjust_long$value[pos] <- adjust_long$value[pos] - 1
diff_fin <- diff_fin + 1
}
idx <- idx + 1
if (all(adjust_long$value <= 0) && diff_fin < 0) break
}
}
others_pyr <- adjust_long |>
select(age_group, age_key, sex, value) |>
pivot_wider(names_from = sex, values_from = value, values_fill = 0)
}
others_pyr <- others_pyr |>
select(age_group, age_key, female, male) |>
arrange(match(age_key, rus_total_by_age$age_key)) |>
select(age_group, female, male)
# Контроль итогов: при небольшой разнице подправим самый широкий возраст
diff_fin <- round(missing_total - sum(others_pyr$female + others_pyr$male, na.rm = TRUE))
if (!is.na(diff_fin) && diff_fin != 0 && nrow(others_pyr) > 0) {
i_star <- which.max(others_pyr$female + others_pyr$male)
# добавим/убавим из женской части (можно и из мужской — не принципиально)
if (diff_fin > 0) {
others_pyr$female[i_star] <- others_pyr$female[i_star] + diff_fin
} else {
diff_abs <- abs(diff_fin)
take_f <- min(others_pyr$female[i_star], diff_abs)
others_pyr$female[i_star] <- others_pyr$female[i_star] - take_f
remaining <- diff_abs - take_f
if (remaining > 0) {
take_m <- min(others_pyr$male[i_star], remaining)
others_pyr$male[i_star] <- others_pyr$male[i_star] - take_m
}
}
}
# Итог «Других»
others_total <- sum(others_pyr$female + others_pyr$male, na.rm = TRUE)
# ---- 4) Итоговая таблица наций: все нации + «Другие народы» + «Итого = total_rus» ----
tmp_res <- bind_rows(
res_by_nat,
tibble(Нация = "Другие народы", Население = others_total)
) |>
arrange(dplyr::desc(Население))
res_total <- bind_rows(
tmp_res,
tibble(Нация = "Итого", Население = sum(tmp_res$Население, na.rm = TRUE))
)
# Подменим «Итого» ровно на total_rus
res_total$Население[res_total$Нация == "Итого"] <- total_rus
# ---- 5) График пирамиды «Другие народы» ----
plot_df <- others_pyr |>
mutate(
age_group = factor(age_group, levels = unique(rus_total_by_age$age_group)),
male_plot = -male,
female_plot = female
) |>
select(age_group, male_plot, female_plot) |>
pivot_longer(c(male_plot, female_plot), names_to = "sex", values_to = "value") |>
mutate(sex = recode(sex, male_plot = "Мужчины", female_plot = "Женщины"))
ggplot(plot_df, aes(x = age_group, y = value, fill = sex)) +
geom_col(width = 0.85) +
coord_flip() +
scale_y_continuous(labels = function(x) format(abs(x), big.mark = " ", scientific = FALSE)) +
labs(
title = "Поло-возрастная пирамида: «Другие народы» (только остаток до населения РФ)",
x = "Возрастная группа",
y = "Численность",
fill = NULL,
caption = "Остаток = РФ − сумма всех наций. Нули/отрицательные устранены, сумма приведена к теоретическому остатку."
) +
theme_minimal(base_size = 12)
# ---- 6) Короткий контроль вывода ----
cat("Всего РФ: ", format(total_rus, big.mark = " ", scientific = FALSE), "\n")
## Всего РФ: 147 182 123
cat("Сумма всех наций (без «Других»): ",
format(sum(res_by_nat$Население), big.mark = " ", scientific = FALSE), "\n")
## Сумма всех наций (без «Других»): 148 268 408
cat("Теоретический остаток (missing_total): ",
format(missing_total, big.mark = " ", scientific = FALSE), "\n")
## Теоретический остаток (missing_total): 0
cat("Фактический остаток в «Другие народы»: ",
format(others_total, big.mark = " ", scientific = FALSE), "\n")
## Фактический остаток в «Другие народы»: 0
project_population_by_nationality <- function(demo,
start_year = 2021L,
end_year = 2050L,
scenario = "средний",
male_birth_share = 0.512) {
ages <- demo$overall_age_1year$age
max_age <- max(ages, na.rm = TRUE)
nat_arrays <- demo$nationality_arrays
migration_nat_names <- unique(demo$migration$net$nationality$russian_name)
migration_nat_names <- migration_nat_names[!is.na(migration_nat_names)]
nat_names <- sort(unique(trimws(c(names(nat_arrays), migration_nat_names))))
n_ages <- length(ages)
n_nats <- length(nat_names)
female_mat <- matrix(0, nrow = n_ages, ncol = n_nats,
dimnames = list(ages, nat_names))
male_mat <- matrix(0, nrow = n_ages, ncol = n_nats,
dimnames = list(ages, nat_names))
for (nm in names(nat_arrays)) {
arr <- nat_arrays[[nm]]
if (is.null(arr)) next
target_nm <- trimws(nm)
if (!target_nm %in% nat_names) next
if (!is.null(arr$female)) female_mat[, target_nm] <- arr$female
if (!is.null(arr$male)) male_mat[, target_nm] <- arr$male
}
female_mat[is.na(female_mat)] <- 0
male_mat[is.na(male_mat)] <- 0
mortality_probs <- demo$mortality$probabilities
qx_f <- mortality_probs$female
qx_m <- mortality_probs$male
qx_f[is.na(qx_f)] <- 0
qx_m[is.na(qx_m)] <- 0
qx_f <- pmin(pmax(qx_f, 0), 0.999)
qx_m <- pmin(pmax(qx_m, 0), 0.999)
qx_f_mat <- matrix(qx_f, nrow = n_ages, ncol = n_nats)
qx_m_mat <- matrix(qx_m, nrow = n_ages, ncol = n_nats)
asfr_nat <- demo$asfr$nationality |>
dplyr::select(nationality, age_start, age_end, asfr)
asfr_russia <- demo$asfr$russia |>
dplyr::select(age_start, age_end, asfr)
build_fertility_profile <- function(tbl) {
profile <- rep(0, n_ages)
if (!nrow(tbl)) {
return(profile)
}
for (i in seq_len(nrow(tbl))) {
row <- tbl[i, , drop = FALSE]
if (any(is.na(row[c("age_start", "age_end", "asfr")]))) next
idx <- (row$age_start[[1]]:row$age_end[[1]]) + 1
idx <- idx[idx >= 1 & idx <= n_ages]
if (!length(idx)) next
profile[idx] <- row$asfr[[1]] / 1000
}
profile
}
default_fertility <- build_fertility_profile(asfr_russia)
fertility_profiles <- purrr::map(nat_names, function(nm) {
tbl <- asfr_nat |>
dplyr::filter(nationality == nm)
prof <- build_fertility_profile(tbl)
if (all(is.na(prof)) || sum(prof, na.rm = TRUE) == 0) {
default_fertility
} else {
prof
}
})
names(fertility_profiles) <- nat_names
tfr_table <- demo$tfr_by_nat |>
dplyr::filter(!is.na(nationality) & nzchar(nationality)) |>
dplyr::transmute(
nationality = trimws(nationality),
tfr = tfr_nat
)
tfr_lookup <- stats::setNames(tfr_table$tfr, tfr_table$nationality)
default_tfr <- demo$russia_tfr
if (is.null(default_tfr) || !is.finite(default_tfr)) {
default_tfr <- tfr_lookup[["Все россияне"]]
}
if (is.null(default_tfr) || !is.finite(default_tfr)) {
default_tfr <- stats::median(tfr_table$tfr, na.rm = TRUE)
}
if (is.null(default_tfr) || !is.finite(default_tfr)) {
default_tfr <- 1.5
}
tfr_targets <- rep(default_tfr, n_nats)
names(tfr_targets) <- nat_names
matched_tfr <- intersect(names(tfr_lookup), nat_names)
for (nm in matched_tfr) {
val <- tfr_lookup[[nm]]
if (is.finite(val)) {
tfr_targets[[nm]] <- val
}
}
childbearing_idx <- which(ages >= 15 & ages <= 49)
childbearing_span <- length(childbearing_idx)
migration_tbl <- demo$migration$net$nationality |>
dplyr::mutate(nationality = trimws(dplyr::coalesce(russian_name, english_name))) |>
dplyr::filter(scenario == !!scenario, !is.na(nationality), nzchar(nationality)) |>
dplyr::select(year, nationality, net_migrants)
migration_year_list <- migration_tbl |>
dplyr::filter(year >= start_year, year < end_year) |>
dplyr::group_by(year, nationality) |>
dplyr::summarise(net_migrants = sum(net_migrants, na.rm = TRUE), .groups = "drop") |>
(\(df) split(df, df$year))()
migration_vectors <- purrr::map(
seq(start_year, end_year - 1),
function(yr) {
vec <- rep(0, n_nats)
names(vec) <- nat_names
yr_chr <- as.character(yr)
if (yr_chr %in% names(migration_year_list)) {
df <- migration_year_list[[yr_chr]]
matched <- intersect(df$nationality, nat_names)
vec[matched] <- df$net_migrants[match(matched, df$nationality)]
}
vec
}
)
names(migration_vectors) <- as.character(seq(start_year, end_year - 1))
mig_age <- demo$migration$age_distribution_1year |>
dplyr::arrange(age)
female_age_share <- mig_age$female
male_age_share <- mig_age$male
female_age_share[is.na(female_age_share)] <- 0
male_age_share[is.na(male_age_share)] <- 0
female_total_share <- sum(female_age_share)
male_total_share <- sum(male_age_share)
total_share <- female_total_share + male_total_share
female_weight <- if (total_share > 0) female_total_share / total_share else 0.5
male_weight <- 1 - female_weight
if (female_total_share > 0) female_age_share <- female_age_share / female_total_share
if (male_total_share > 0) male_age_share <- male_age_share / male_total_share
female_age_mat <- matrix(female_age_share, nrow = n_ages, ncol = n_nats)
male_age_mat <- matrix(male_age_share, nrow = n_ages, ncol = n_nats)
nat_history <- list(
tibble::tibble(
year = start_year,
nationality = nat_names,
female = colSums(female_mat),
male = colSums(male_mat)
) |>
dplyr::mutate(total = female + male)
)
flow_history <- list()
pop_f <- female_mat
pop_m <- male_mat
for (yr in seq(start_year, end_year - 1)) {
fert_vec <- purrr::imap_dbl(fertility_profiles, function(profile, nm) {
sum(pop_f[, nm] * profile, na.rm = TRUE)
})
female_birth_share <- 1 - male_birth_share
female_births <- fert_vec * female_birth_share
male_births <- fert_vec * male_birth_share
if (childbearing_span > 0 && length(childbearing_idx)) {
female_childbearing <- colSums(pop_f[childbearing_idx, , drop = FALSE], na.rm = TRUE)
female_childbearing[female_childbearing < 0 | !is.finite(female_childbearing)] <- 0
target_births <- female_childbearing * (tfr_targets / childbearing_span)
target_births[!is.finite(target_births)] <- NA_real_
target_births[target_births < 0] <- 0
scale_applicable <- !is.na(target_births)
if (any(scale_applicable)) {
base_positive <- scale_applicable & fert_vec > 0 & is.finite(fert_vec)
scale_factor <- rep(1, n_nats)
scale_factor[base_positive] <- target_births[base_positive] / fert_vec[base_positive]
scale_factor[!is.finite(scale_factor)] <- 0
if (any(base_positive)) {
female_births[base_positive] <- female_births[base_positive] * scale_factor[base_positive]
male_births[base_positive] <- male_births[base_positive] * scale_factor[base_positive]
fert_vec[base_positive] <- fert_vec[base_positive] * scale_factor[base_positive]
}
base_zero <- scale_applicable & (!base_positive)
if (any(base_zero)) {
female_births[base_zero] <- target_births[base_zero] * female_birth_share
male_births[base_zero] <- target_births[base_zero] * male_birth_share
fert_vec[base_zero] <- target_births[base_zero]
}
}
}
deaths_f <- colSums(pop_f * qx_f_mat, na.rm = TRUE)
deaths_m <- colSums(pop_m * qx_m_mat, na.rm = TRUE)
survivors_f <- pop_f - pop_f * qx_f_mat
survivors_m <- pop_m - pop_m * qx_m_mat
survivors_f[survivors_f < 0] <- 0
survivors_m[survivors_m < 0] <- 0
next_f <- matrix(0, nrow = n_ages, ncol = n_nats,
dimnames = list(ages, nat_names))
next_m <- matrix(0, nrow = n_ages, ncol = n_nats,
dimnames = list(ages, nat_names))
next_f[1, ] <- female_births
next_m[1, ] <- male_births
if (n_ages > 1) {
next_f[2:n_ages, ] <- survivors_f[1:(n_ages - 1), ]
next_m[2:n_ages, ] <- survivors_m[1:(n_ages - 1), ]
}
next_f[n_ages, ] <- next_f[n_ages, ] + survivors_f[n_ages, ]
next_m[n_ages, ] <- next_m[n_ages, ] + survivors_m[n_ages, ]
mig_vec <- migration_vectors[[as.character(yr)]]
female_mig <- mig_vec * female_weight
male_mig <- mig_vec * male_weight
if (any(female_mig != 0)) {
next_f <- next_f + sweep(female_age_mat, 2, female_mig, `*`)
}
if (any(male_mig != 0)) {
next_m <- next_m + sweep(male_age_mat, 2, male_mig, `*`)
}
next_f[next_f < 0] <- 0
next_m[next_m < 0] <- 0
flow_history[[length(flow_history) + 1]] <- tibble::tibble(
year = yr,
nationality = nat_names,
births = fert_vec,
deaths = deaths_f + deaths_m,
net_migration = mig_vec
)
pop_f <- next_f
pop_m <- next_m
nat_history[[length(nat_history) + 1]] <- tibble::tibble(
year = yr + 1L,
nationality = nat_names,
female = colSums(pop_f),
male = colSums(pop_m)
) |>
dplyr::mutate(total = female + male)
}
list(
population = dplyr::bind_rows(nat_history),
flows = dplyr::bind_rows(flow_history),
parameters = list(
start_year = start_year,
end_year = end_year,
scenario = scenario,
male_birth_share = male_birth_share
)
)
}
projection <- project_population_by_nationality(demography_inputs)
population_totals <- projection$population |>
dplyr::group_by(year) |>
dplyr::summarise(total_population = sum(total, na.rm = TRUE), .groups = "drop") |>
dplyr::arrange(year) |>
dplyr::mutate(population_next = dplyr::lead(total_population))
flow_summary <- projection$flows |>
dplyr::group_by(year) |>
dplyr::summarise(
births = sum(births, na.rm = TRUE),
deaths = sum(deaths, na.rm = TRUE),
net_migration = sum(net_migration, na.rm = TRUE),
natural_increase = births - deaths,
.groups = "drop"
) |>
dplyr::left_join(
population_totals |>
dplyr::select(year, population_start = total_population, population_next),
by = "year"
) |>
dplyr::mutate(
population_end = dplyr::coalesce(population_next, population_start + natural_increase + net_migration),
population_change = population_end - population_start
) |>
dplyr::select(year, population_start, births, deaths, natural_increase, net_migration, population_end, population_change)
status_lookup <- dplyr::bind_rows(
cat_tbl |>
dplyr::select(nationality, status),
demography_inputs$migration$composition$share_mapped |>
dplyr::select(nationality = russian_name, status)
) |>
dplyr::filter(!is.na(nationality) & nzchar(nationality)) |>
dplyr::mutate(
nationality = trimws(nationality),
status = trimws(status),
status = dplyr::na_if(status, "")
) |>
dplyr::group_by(nationality) |>
dplyr::summarise(
status = {
non_na <- status[!is.na(status)]
if (length(non_na)) non_na[1] else NA_character_
},
.groups = "drop"
)
population_with_status <- projection$population |>
dplyr::left_join(status_lookup, by = c("nationality" = "nationality")) |>
dplyr::mutate(
status_clean = tolower(status),
category = dplyr::case_when(
status_clean %in% "русские" ~ "Русские",
stringr::str_detect(status_clean, "мигран") ~ "Мигранты",
status_clean %in% "другие коренные народы" ~ "Коренные народы",
TRUE ~ "Прочие/неопределено"
)
)
population_2050 <- population_with_status |>
dplyr::filter(year == 2050)
status_share <- population_with_status |>
dplyr::group_by(year, category) |>
dplyr::summarise(total = sum(total, na.rm = TRUE), .groups = "drop") |>
dplyr::group_by(year) |>
dplyr::mutate(
total_year = sum(total, na.rm = TRUE),
share = dplyr::if_else(total_year > 0, total / total_year, NA_real_)
) |>
dplyr::select(-total_year) |>
dplyr::ungroup()
flow_summary_display <- flow_summary |>
dplyr::filter(year %in% c(2021, 2025, 2030, 2035, 2040, 2045, 2049)) |>
dplyr::mutate(dplyr::across(-year, ~ .x / 1e6))
top10_2050 <- population_2050 |>
dplyr::arrange(dplyr::desc(total)) |>
dplyr::slice_head(n = 10) |>
dplyr::transmute(
Национальность = nationality,
Статус = status,
Всего_млн = total / 1e6,
Женщины_млн = female / 1e6,
Мужчины_млн = male / 1e6
)
top10_migrants_2050 <- population_2050 |>
dplyr::filter(category == "Мигранты") |>
dplyr::arrange(dplyr::desc(total)) |>
dplyr::slice_head(n = 10) |>
dplyr::transmute(
Национальность = nationality,
Статус = status,
Всего_млн = total / 1e6,
Женщины_млн = female / 1e6,
Мужчины_млн = male / 1e6
)
knitr::kable(
flow_summary_display,
caption = "Итоги когортно-компонентного прогноза (млн человек)",
digits = 2,
format.args = list(big.mark = " ")
)
| year | population_start | births | deaths | natural_increase | net_migration | population_end | population_change |
|---|---|---|---|---|---|---|---|
| 2 021 | 148.27 | 1.52 | 2.49 | -0.97 | 0.12 | 147.41 | -0.86 |
| 2 025 | 144.78 | 1.47 | 2.49 | -1.01 | 0.15 | 143.92 | -0.86 |
| 2 030 | 140.56 | 1.41 | 2.45 | -1.04 | 0.20 | 139.72 | -0.84 |
| 2 035 | 136.21 | 1.30 | 2.48 | -1.18 | 0.25 | 135.28 | -0.93 |
| 2 040 | 131.42 | 1.21 | 2.45 | -1.24 | 0.25 | 130.42 | -0.99 |
| 2 045 | 126.51 | 1.19 | 2.38 | -1.19 | 0.25 | 125.58 | -0.94 |
| 2 049 | 122.86 | 1.19 | 2.32 | -1.13 | 0.25 | 121.98 | -0.88 |
ggplot(population_totals, aes(x = factor(year), y = total_population / 1e6)) +
geom_col(fill = "#4063D8") +
scale_x_discrete(breaks = seq(2021, 2050, by = 5)) +
labs(
title = "Прогноз численности населения России (2021–2050)",
x = "Год",
y = "Население, млн человек"
) +
theme_minimal(base_size = 12)
knitr::kable(
top10_2050,
caption = "Наиболее многочисленные национальности в 2050 году (млн человек)",
digits = 2,
format.args = list(big.mark = " ")
)
| Национальность | Статус | Всего_млн | Женщины_млн | Мужчины_млн |
|---|---|---|---|---|
| Русские | коренной | 90.50 | 48.72 | 41.78 |
| Татары | коренной | 4.06 | 2.16 | 1.90 |
| Чеченцы | коренной | 2.45 | 1.25 | 1.19 |
| Узбеки | мигрант | 2.16 | 1.07 | 1.09 |
| Другие народы | NA | 1.48 | 0.83 | 0.65 |
| Таджики | мигрант | 1.44 | 0.70 | 0.74 |
| Башкиры | коренной | 1.41 | 0.75 | 0.66 |
| Аварцы | коренной | 1.24 | 0.64 | 0.60 |
| Армяне | мигрант | 1.21 | 0.61 | 0.59 |
| Казахи | мигрант | 1.04 | 0.53 | 0.50 |
knitr::kable(
top10_migrants_2050,
caption = "Наиболее многочисленные мигрантские народы в 2050 году (млн человек)",
digits = 2,
format.args = list(big.mark = " ")
)
| Национальность | Статус | Всего_млн | Женщины_млн | Мужчины_млн |
|---|---|---|---|---|
| Узбеки | мигрант | 2.16 | 1.07 | 1.09 |
| Таджики | мигрант | 1.44 | 0.70 | 0.74 |
| Армяне | мигрант | 1.21 | 0.61 | 0.59 |
| Казахи | мигрант | 1.04 | 0.53 | 0.50 |
| Азербайджанцы | мигрант | 0.83 | 0.42 | 0.42 |
| Киргизы | мигрант | 0.80 | 0.40 | 0.39 |
| Украинцы | мигрант | 0.74 | 0.40 | 0.34 |
| Индийцы | мигрант | 0.61 | 0.31 | 0.30 |
| Другие вне СНГ | мигрант | 0.33 | 0.16 | 0.16 |
| Белорусы | мигрант | 0.27 | 0.14 | 0.13 |
status_plot <- status_share |>
dplyr::filter(category %in% c("Русские", "Мигранты")) |>
dplyr::mutate(category = factor(category, levels = c("Русские", "Мигранты")))
ggplot(status_plot, aes(x = year, y = share, color = category)) +
geom_line(size = 1.1) +
geom_point(size = 2) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_continuous(breaks = seq(2021, 2050, by = 5)) +
labs(
title = "Динамика доли русских и мигрантов (включая последующие поколения)",
x = "Год",
y = "Доля населения",
color = NULL
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")