Data Source: U.S. Census Bureau 2023 Vintage Population Estimates. Downloaded here: https://www.census.gov/data/datasets/time-series/demo/popest/2020s-state-detail.html
Data codebook here: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2020-2023/SC-EST2023-ALLDATA6.pdf
# Load libraries
library(readxl)
Warning: package 'readxl' was built under R version 4.2.3
Warning: package 'dplyr' was built under R version 4.2.3
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Warning: package 'tidyr' was built under R version 4.2.3
Warning: package 'writexl' was built under R version 4.2.3
# Load data
data <- read_excel("2023 Vintage Estimates_copy.xlsx")
# Define age groups and corresponding labels
age_bins <- c(0, 4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84, Inf)
age_labels <- c('0 - 4', '5 - 9', '10 - 14', '15 - 19', '20 - 24', '25 - 29', '30 - 34',
'35 - 39', '40 - 44', '45 - 49', '50 - 54', '55 - 59', '60 - 64',
'65 - 69', '70 - 74', '75 - 79', '80 - 84', '85+')
# Create age group factor
data <- data %>%
mutate(`Age Group` = cut(AGE, breaks = age_bins, labels = age_labels, right = TRUE))
# Summarize by Age Group and SEX (where 1 = Male and 2 = Female)
summary_data <- data %>%
filter(SEX %in% c(1, 2)) %>%
group_by(`Age Group`, SEX) %>%
summarise(Population = sum(POPESTIMATE2023, na.rm = TRUE)) %>%
pivot_wider(names_from = SEX, values_from = Population, values_fill = 0)
`summarise()` has grouped output by 'Age Group'. You can override using the
`.groups` argument.
# Rename columns for Male and Female
colnames(summary_data) <- c('Age Group', 'Male', 'Female')
# Print the resulting summary table
print(summary_data)
# A tibble: 19 × 3
# Groups: Age Group [19]
`Age Group` Male Female
<fct> <dbl> <dbl>
1 0 - 4 10861 10508
2 5 - 9 10183 9685
3 10 - 14 11224 10977
4 15 - 19 13557 12807
5 20 - 24 13098 12462
6 25 - 29 13076 12863
7 30 - 34 13409 12286
8 35 - 39 12591 11177
9 40 - 44 12452 10645
10 45 - 49 11010 10027
11 50 - 54 10186 9530
12 55 - 59 9395 8596
13 60 - 64 8483 8163
14 65 - 69 7002 6416
15 70 - 74 4706 4648
16 75 - 79 3092 3262
17 80 - 84 1814 1924
18 85+ 1744 2272
19 <NA> 2512 2546
# Save the summary table to an Excel file
write_xlsx(summary_data, "AIAN_Sex_Age_2024.xlsx")