# Load necessary libraries for data manipulation and visualization
library(tidyverse) # For data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2) # For creating static plots
library(readr) # For reading CSV files
library(dplyr) # For data manipulation
library(knitr) # For creating dynamic reports
library(ggcorrplot) # For correlation plots
library(plotly) # For interactive plots
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggthemes) # For additional themes for ggplot2
# Load the dataset
file_path <- "~/Downloads/NST-EST2024-ALLDATA.csv"
data <- read_csv(file_path)
## Rows: 66 Columns: 75
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): SUMLEV, REGION, DIVISION, STATE, NAME
## dbl (70): ESTIMATESBASE2020, POPESTIMATE2020, POPESTIMATE2021, POPESTIMATE20...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data) # Display the first few rows of the dataset
## # A tibble: 6 × 75
## SUMLEV REGION DIVISION STATE NAME ESTIMATESBASE2020 POPESTIMATE2020
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 010 0 0 00 United States 331515736 331577720
## 2 020 1 0 00 Northeast Regi… 57617706 57431458
## 3 030 1 1 00 New England 15122011 15057350
## 4 030 1 2 00 Middle Atlantic 42495695 42374108
## 5 020 2 0 00 Midwest Region 68998970 68984258
## 6 030 2 3 00 East North Cen… 47381362 47358568
## # ℹ 68 more variables: POPESTIMATE2021 <dbl>, POPESTIMATE2022 <dbl>,
## # POPESTIMATE2023 <dbl>, POPESTIMATE2024 <dbl>, NPOPCHG_2020 <dbl>,
## # NPOPCHG_2021 <dbl>, NPOPCHG_2022 <dbl>, NPOPCHG_2023 <dbl>,
## # NPOPCHG_2024 <dbl>, BIRTHS2020 <dbl>, BIRTHS2021 <dbl>, BIRTHS2022 <dbl>,
## # BIRTHS2023 <dbl>, BIRTHS2024 <dbl>, DEATHS2020 <dbl>, DEATHS2021 <dbl>,
## # DEATHS2022 <dbl>, DEATHS2023 <dbl>, DEATHS2024 <dbl>, NATURALCHG2020 <dbl>,
## # NATURALCHG2021 <dbl>, NATURALCHG2022 <dbl>, NATURALCHG2023 <dbl>, …
# Generate summary statistics for the dataset
summary(data)
## SUMLEV REGION DIVISION STATE
## Length:66 Length:66 Length:66 Length:66
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## NAME ESTIMATESBASE2020 POPESTIMATE2020 POPESTIMATE2021
## Length:66 Min. : 576844 Min. : 577681 Min. : 579636
## Class :character 1st Qu.: 2943628 1st Qu.: 2943263 1st Qu.: 2940556
## Mode :character Median : 6024512 Median : 6026060 Median : 6026491
## Mean : 20141649 Mean : 20145340 Mean : 20176693
## 3rd Qu.: 18334475 3rd Qu.: 18335226 3rd Qu.: 18381832
## Max. :331515736 Max. :331577720 Max. :332099760
## POPESTIMATE2022 POPESTIMATE2023 POPESTIMATE2024 NPOPCHG_2020
## Min. : 581978 Min. : 585067 Min. : 587618 Min. :-186248
## 1st Qu.: 2938478 1st Qu.: 2945254 1st Qu.: 2949935 1st Qu.: -4213
## Median : 6041694 Median : 6069222 Median : 6103220 Median : 1297
## Mean : 20292264 Mean : 20461041 Mean : 20661322 Mean : 3692
## 3rd Qu.: 18493586 3rd Qu.: 18618918 3rd Qu.: 18746957 3rd Qu.: 11098
## Max. :334017321 Max. :336806231 Max. :340110988 Max. : 195012
## NPOPCHG_2021 NPOPCHG_2022 NPOPCHG_2023 NPOPCHG_2024
## Min. :-379393 Min. :-150424 Min. : -16345 Min. : -516
## 1st Qu.: -3730 1st Qu.: 1262 1st Qu.: 11493 1st Qu.: 16021
## Median : 8732 Median : 14397 Median : 34603 Median : 54629
## Mean : 31353 Mean : 115571 Mean : 168777 Mean : 200281
## 3rd Qu.: 36170 3rd Qu.: 47635 3rd Qu.: 116482 3rd Qu.: 145868
## Max. : 891461 Max. :1917561 Max. :2788910 Max. :3304757
## BIRTHS2020 BIRTHS2021 BIRTHS2022 BIRTHS2023
## Min. : 1303 Min. : 5153 Min. : 5401 Min. : 5100
## 1st Qu.: 6576 1st Qu.: 26649 1st Qu.: 26569 1st Qu.: 26658
## Median : 16786 Median : 64726 Median : 66796 Median : 65358
## Mean : 54258 Mean : 217525 Mean : 223352 Mean : 221431
## 3rd Qu.: 46575 3rd Qu.: 188860 3rd Qu.: 193631 3rd Qu.: 190024
## Max. :894123 Max. :3584459 Max. :3680380 Max. :3648896
## BIRTHS2024 DEATHS2020 DEATHS2021 DEATHS2022
## Min. : 5039 Min. : 1169 Min. : 5333 Min. : 5603
## 1st Qu.: 26320 1st Qu.: 6137 1st Qu.: 28777 1st Qu.: 30731
## Median : 64053 Median : 15158 Median : 63466 Median : 64190
## Mean : 218795 Mean : 51748 Mean : 208895 Mean : 209994
## 3rd Qu.: 189169 3rd Qu.: 51068 3rd Qu.: 171797 3rd Qu.: 171247
## Max. :3605563 Max. :852024 Max. :3438423 Max. :3456354
## DEATHS2023 DEATHS2024 NATURALCHG2020 NATURALCHG2021
## Min. : 5035 Min. : 4985 Min. :-47253 Min. :-42470
## 1st Qu.: 26781 1st Qu.: 25576 1st Qu.: -1270 1st Qu.: -5059
## Median : 59512 Median : 58894 Median : 293 Median : 320
## Mean : 191702 Mean : 187600 Mean : 2510 Mean : 8630
## 3rd Qu.: 160643 3rd Qu.: 156657 3rd Qu.: 3336 3rd Qu.: 11746
## Max. :3154285 Max. :3086925 Max. : 51560 Max. :146036
## NATURALCHG2022 NATURALCHG2023 NATURALCHG2024 INTERNATIONALMIG2020
## Min. :-38556 Min. :-16291 Min. :-15701 Min. :-1528.0
## 1st Qu.: -5853 1st Qu.: -789 1st Qu.: -494 1st Qu.: 100.8
## Median : 798 Median : 3516 Median : 3600 Median : 282.5
## Mean : 13359 Mean : 29730 Mean : 31195 Mean : 1182.0
## 3rd Qu.: 12673 3rd Qu.: 22448 3rd Qu.: 22722 3rd Qu.: 992.0
## Max. :224026 Max. :494611 Max. :518638 Max. :19885.0
## INTERNATIONALMIG2021 INTERNATIONALMIG2022 INTERNATIONALMIG2023
## Min. : -4317 Min. : -28141 Min. : -54
## 1st Qu.: 1749 1st Qu.: 7081 1st Qu.: 10008
## Median : 4775 Median : 20549 Median : 27093
## Mean : 22723 Mean : 102212 Mean : 139048
## 3rd Qu.: 17834 3rd Qu.: 72905 3rd Qu.: 104163
## Max. :376004 Max. :1693535 Max. :2294299
## INTERNATIONALMIG2024 DOMESTICMIG2020 DOMESTICMIG2021 DOMESTICMIG2022
## Min. : 506 Min. :-132548 Min. :-497239 Min. :-447635
## 1st Qu.: 12621 1st Qu.: -4732 1st Qu.: -13672 1st Qu.: -28511
## Median : 33069 Median : 21 Median : 2324 Median : -1334
## Mean : 169086 Mean : 0 Mean : 0 Mean : 0
## 3rd Qu.: 126258 3rd Qu.: 7960 3rd Qu.: 26357 3rd Qu.: 18070
## Max. :2786119 Max. : 157428 Max. : 719421 Max. : 849097
## DOMESTICMIG2023 DOMESTICMIG2024 NETMIG2020 NETMIG2021
## Min. :-385255 Min. :-251161.0 Min. :-128118.0 Min. :-433592
## 1st Qu.: -15728 1st Qu.: -7257.0 1st Qu.: -4540.8 1st Qu.: -5134
## Median : 0 Median : 430.5 Median : 322.5 Median : 6435
## Mean : 0 Mean : 0.0 Mean : 1182.0 Mean : 22723
## 3rd Qu.: 9858 3rd Qu.: 13171.0 3rd Qu.: 8922.2 3rd Qu.: 31387
## Max. : 678977 Max. : 411004.0 Max. : 166386.0 Max. : 874645
## NETMIG2022 NETMIG2023 NETMIG2024 RESIDUAL2020
## Min. :-183999 Min. : -51308 Min. : 255 Min. :-10877.0
## 1st Qu.: 1278 1st Qu.: 8683 1st Qu.: 14173 1st Qu.: -373.8
## Median : 17142 Median : 28818 Median : 40258 Median : 284.5
## Mean : 102212 Mean : 139048 Mean : 169086 Mean : 0.0
## 3rd Qu.: 39641 3rd Qu.: 91000 3rd Qu.: 120446 3rd Qu.: 1212.8
## Max. :1693535 Max. :2294299 Max. :2786119 Max. : 12743.0
## RESIDUAL2021 RESIDUAL2022 RESIDUAL2023 RESIDUAL2024
## Min. :-13864 Min. :-14403.0 Min. :-6597.0 Min. :-1404.00
## 1st Qu.: -312 1st Qu.: -1238.2 1st Qu.: -585.8 1st Qu.: -55.75
## Median : 418 Median : -289.0 Median : -192.0 Median : 0.00
## Mean : 0 Mean : 0.0 Mean : 0.0 Mean : 0.00
## 3rd Qu.: 1486 3rd Qu.: 336.8 3rd Qu.: 83.5 3rd Qu.: 52.00
## Max. : 20084 Max. : 23274.0 Max. :11264.0 Max. : 1508.00
## RBIRTH2021 RBIRTH2022 RBIRTH2023 RBIRTH2024
## Min. : 5.745 Min. : 6.085 Min. : 5.879 Min. : 5.687
## 1st Qu.:10.200 1st Qu.:10.335 1st Qu.:10.170 1st Qu.: 9.945
## Median :10.759 Median :10.949 Median :10.816 Median :10.566
## Mean :10.753 Mean :10.934 Mean :10.751 Mean :10.534
## 3rd Qu.:11.427 3rd Qu.:11.620 3rd Qu.:11.526 3rd Qu.:11.386
## Max. :13.800 Max. :13.752 Max. :13.300 Max. :13.156
## RDEATH2021 RDEATH2022 RDEATH2023 RDEATH2024
## Min. : 6.635 Min. : 6.927 Min. : 6.203 Min. : 6.250
## 1st Qu.: 9.560 1st Qu.: 9.701 1st Qu.: 8.755 1st Qu.: 8.620
## Median :10.349 Median :10.681 Median : 9.771 Median : 9.557
## Mean :10.584 Mean :10.763 Mean : 9.778 Mean : 9.524
## 3rd Qu.:11.456 3rd Qu.:11.720 3rd Qu.:10.590 3rd Qu.:10.364
## Max. :15.418 Max. :16.802 Max. :14.620 Max. :13.854
## RNATURALCHG2021 RNATURALCHG2022 RNATURALCHG2023 RNATURALCHG2024
## Min. :-5.88215 Min. :-7.2354 Min. :-5.0872 Min. :-4.90114
## 1st Qu.:-1.20938 1st Qu.:-1.3499 1st Qu.:-0.2312 1st Qu.:-0.06743
## Median : 0.07771 Median : 0.1666 Median : 0.9961 Median : 0.94479
## Mean : 0.16912 Mean : 0.1711 Mean : 0.9728 Mean : 1.00974
## 3rd Qu.: 1.44646 3rd Qu.: 1.6865 3rd Qu.: 2.2238 3rd Qu.: 2.17480
## Max. : 7.16445 Max. : 6.8248 Max. : 7.0972 Max. : 6.90559
## RINTERNATIONALMIG2021 RINTERNATIONALMIG2022 RINTERNATIONALMIG2023
## Min. :-1.3193 Min. :-8.682 Min. :-0.01681
## 1st Qu.: 0.6450 1st Qu.: 2.715 1st Qu.: 3.47209
## Median : 0.9181 Median : 3.678 Median : 5.28127
## Mean : 0.9676 Mean : 4.086 Mean : 5.60466
## 3rd Qu.: 1.2229 3rd Qu.: 5.388 3rd Qu.: 7.32704
## Max. : 2.6071 Max. :14.884 Max. :15.13142
## RINTERNATIONALMIG2024 RDOMESTICMIG2021 RDOMESTICMIG2022 RDOMESTICMIG2023
## Min. : 0.4461 Min. :-15.0204 Min. :-14.9509 Min. :-8.9700
## 1st Qu.: 4.3246 1st Qu.: -2.8993 1st Qu.: -3.0987 1st Qu.:-2.0239
## Median : 6.3290 Median : 0.8057 Median : -0.2123 Median : 0.0000
## Mean : 6.8110 Mean : 1.7731 Mean : 0.7255 Mean : 0.6325
## 3rd Qu.: 8.8537 3rd Qu.: 6.4518 3rd Qu.: 6.3592 3rd Qu.: 4.7545
## Max. :17.9940 Max. : 27.6901 Max. : 16.0157 Max. :15.0045
## RDOMESTICMIG2024 RNETMIG2021 RNETMIG2022 RNETMIG2023
## Min. :-6.4560 Min. :-13.580 Min. :-9.3042 Min. :-2.611
## 1st Qu.:-1.4748 1st Qu.: -1.884 1st Qu.: 0.5504 1st Qu.: 3.336
## Median : 0.1493 Median : 1.722 Median : 3.7743 Median : 5.141
## Mean : 0.6815 Mean : 2.741 Mean : 4.8113 Mean : 6.237
## 3rd Qu.: 3.0308 3rd Qu.: 7.205 3rd Qu.: 9.2830 3rd Qu.: 8.502
## Max. :12.5233 Max. : 28.062 Max. :25.8026 Max. :23.288
## RNETMIG2024
## Min. : 0.3454
## 1st Qu.: 4.9796
## Median : 6.3117
## Mean : 7.4925
## 3rd Qu.: 9.7671
## Max. :20.5432
library(tidyverse)
library(plotly)
library(DT)
# Load and clean the data
us_pop <- read_csv("~/Downloads/NST-EST2024-ALLDATA.csv")
# Clean and transform the data for state-level analysis
us_pop_clean <- us_pop %>%
filter(SUMLEV == "040") %>% # Focus on state-level data
select(
STATE,
NAME,
POPESTIMATE2020:POPESTIMATE2024,
RBIRTH2021:RNETMIG2024,
BIRTHS2021:NETMIG2024,
NPOPCHG_2021:NPOPCHG_2024
) %>%
rename(State_Code = STATE, State_Name = NAME)
# Prepare national-level data
national_data <- us_pop %>%
filter(SUMLEV == "010") %>%
select(NAME, POPESTIMATE2020:POPESTIMATE2024) %>%
pivot_longer(
cols = starts_with("POPESTIMATE"),
names_to = "Year",
values_to = "Population"
) %>%
mutate(Year = as.numeric(str_remove(Year, "POPESTIMATE")))
# Create an interactive plot for national population trend
fig_national <- plot_ly(
data = national_data,
x = ~Year,
y = ~Population,
type = "scatter",
mode = "lines+markers",
name = "US Population"
) %>%
layout(
title = "US Population Trend (2020-2024)",
xaxis = list(title = "Year"),
yaxis = list(title = "Population")
)
fig_national
# Prepare regional-level data
regional_data <- us_pop %>%
filter(SUMLEV == "020" & REGION != "0") %>%
select(REGION, NAME, POPESTIMATE2020:POPESTIMATE2024) %>%
pivot_longer(
cols = starts_with("POPESTIMATE"),
names_to = "Year",
values_to = "Population"
) %>%
mutate(Year = as.numeric(str_remove(Year, "POPESTIMATE")))
# Create an interactive plot for regional population trends
fig_regional <- plot_ly(
data = regional_data,
x = ~Year,
y = ~Population,
color = ~NAME,
type = "scatter",
mode = "lines+markers"
) %>%
layout(
title = "Regional Population Trends (2020-2024)",
xaxis = list(title = "Year"),
yaxis = list(title = "Population")
)
fig_regional
# Prepare data for components of population change
components_data <- us_pop_clean %>%
select(
State_Name,
NPOPCHG_2024,
BIRTHS2024,
DEATHS2024,
NETMIG2024
)
components_data_long <- components_data %>%
pivot_longer(
cols = c(BIRTHS2024, DEATHS2024, NETMIG2024),
names_to = "Component",
values_to = "Value"
) %>%
mutate(Component = str_remove(Component, "2024"))
# Create an interactive bar plot for components of population change
fig_components <- plot_ly(
data = components_data_long,
x = ~State_Name,
y = ~Value,
color = ~Component,
type = "bar"
) %>%
layout(
title = "Components of Population Change by State (2024)",
xaxis = list(title = "State"),
yaxis = list(title = "Change"),
barmode = "group"
)
fig_components
# Calculate state-level population change rates
state_change_data <- us_pop_clean %>%
mutate(Pop_Change = POPESTIMATE2024 - POPESTIMATE2023) %>%
mutate(Change_Rate = (Pop_Change / POPESTIMATE2023) * 100) %>%
select(State_Name, Change_Rate)
# Create an interactive bar plot for state-level population change rates
fig_state_change <- plot_ly(
data = state_change_data,
x = ~State_Name,
y = ~Change_Rate,
type = "bar",
color = ~Change_Rate,
colors = "RdBu"
) %>%
layout(
title = "State-Level Population Change Rate (2023-2024)",
xaxis = list(title = "State"),
yaxis = list(title = "Change Rate (%)")
)
fig_state_change
# Display the cleaned data in an interactive table
datatable(
us_pop_clean,
options = list(pageLength = 10),
rownames = FALSE
)
library(tidyverse)
library(plotly)
library(DT)
library(viridis)
library(ggthemes)
library(maps)
# Load and clean the data (same as before)
us_pop <- read_csv("~/Downloads/NST-EST2024-ALLDATA.csv")
us_pop_clean <- us_pop %>%
filter(SUMLEV == "040") %>%
select(
STATE,
NAME,
POPESTIMATE2020:POPESTIMATE2024,
RBIRTH2021:RNETMIG2024,
BIRTHS2021:NETMIG2024,
NPOPCHG_2021:NPOPCHG_2024
) %>%
rename(State_Code = STATE, State_Name = NAME)
# Create an enhanced national population trend plot using ggplot2
national_data <- us_pop %>%
filter(SUMLEV == "010") %>%
select(NAME, POPESTIMATE2020:POPESTIMATE2024) %>%
pivot_longer(
cols = starts_with("POPESTIMATE"),
names_to = "Year",
values_to = "Population"
) %>%
mutate(Year = as.numeric(str_remove(Year, "POPESTIMATE")))
fig_national <- ggplot(national_data, aes(x = Year, y = Population)) +
geom_line(color = "steelblue", size = 1.2) +
geom_point(color = "steelblue", size = 3) +
labs(
title = "US Population Trend (2020-2024)",
x = "Year",
y = "Population"
) +
theme_economist() # Apply a professional theme
ggplotly(fig_national) # Convert to an interactive plot
# Create an enhanced regional population trends plot
regional_data <- us_pop %>%
filter(SUMLEV == "020" & REGION != "0") %>%
select(REGION, NAME, POPESTIMATE2020:POPESTIMATE2024) %>%
pivot_longer(
cols = starts_with("POPESTIMATE"),
names_to = "Year",
values_to = "Population"
) %>%
mutate(Year = as.numeric(str_remove(Year, "POPESTIMATE")))
fig_regional <- ggplot(regional_data, aes(x = Year, y = Population, color = NAME)) +
geom_line(size = 1) +
labs(
title = "Regional Population Trends (2020-2024)",
x = "Year",
y = "Population"
) +
scale_color_viridis(discrete = TRUE, option = "D") + # Use a color-blind friendly palette
theme_excel() # Apply a different theme for variety
ggplotly(fig_regional)
# Create a heatmap for components of population change
components_data <- us_pop_clean %>%
select(
State_Name,
NPOPCHG_2024,
BIRTHS2024,
DEATHS2024,
NETMIG2024
)
components_data_long <- components_data %>%
pivot_longer(
cols = c(BIRTHS2024, DEATHS2024, NETMIG2024),
names_to = "Component",
values_to = "Value"
) %>%
mutate(Component = str_remove(Component, "2024"))
fig_components_heatmap <- ggplot(components_data_long, aes(x = State_Name, y = Component, fill = Value)) +
geom_tile(color = "white") +
scale_fill_viridis(option = "A") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = "Components of Population Change by State (2024) - Heatmap",
x = "State",
y = "Component"
)
ggplotly(fig_components_heatmap)
# Prepare data for a potential choropleth map (not implemented in this code)
state_change_data <- us_pop_clean %>%
mutate(Pop_Change = POPESTIMATE2024 - POPESTIMATE2023) %>%
mutate(Change_Rate = (Pop_Change / POPESTIMATE2023) * 100) %>%
select(State_Name, Change_Rate, State_Code)
state_codes <- data.frame(
State_Name = state.name,
State_Code = as.character(state.abb),
stringsAsFactors = FALSE
)
This R Markdown file provides a comprehensive analysis of US population trends from 2020 to 2024. It includes various visualizations such as line plots for national and regional trends, bar plots for components of population change, and a heatmap for state-level analysis. The use of interactive plots (via plotly) allows for more detailed exploration of the data.
The analysis covers national population growth, regional trends, state-level changes, and components of population change (births, deaths, and migration). This aligns with the reported population growth of nearly 1% between 2023 and 2024, which was the highest since 2001.
The visualizations help illustrate key findings such as: 1. The overall upward trend in US population 2. Differences in population growth rates among regions 3. Variations in components of population change across states
These insights can be valuable for understanding demographic shifts, informing policy decisions, and projecting future population trends.