Load Libraries

# Load necessary libraries for data manipulation and visualization
library(tidyverse)  # For data manipulation and visualization
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)    # For creating static plots
library(readr)      # For reading CSV files
library(dplyr)      # For data manipulation
library(knitr)      # For creating dynamic reports
library(ggcorrplot) # For correlation plots
library(plotly)     # For interactive plots
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggthemes)   # For additional themes for ggplot2

Load Data

# Load the dataset
file_path <- "~/Downloads/NST-EST2024-ALLDATA.csv"
data <- read_csv(file_path)
## Rows: 66 Columns: 75
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): SUMLEV, REGION, DIVISION, STATE, NAME
## dbl (70): ESTIMATESBASE2020, POPESTIMATE2020, POPESTIMATE2021, POPESTIMATE20...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)  # Display the first few rows of the dataset
## # A tibble: 6 × 75
##   SUMLEV REGION DIVISION STATE NAME            ESTIMATESBASE2020 POPESTIMATE2020
##   <chr>  <chr>  <chr>    <chr> <chr>                       <dbl>           <dbl>
## 1 010    0      0        00    United States           331515736       331577720
## 2 020    1      0        00    Northeast Regi…          57617706        57431458
## 3 030    1      1        00    New England              15122011        15057350
## 4 030    1      2        00    Middle Atlantic          42495695        42374108
## 5 020    2      0        00    Midwest Region           68998970        68984258
## 6 030    2      3        00    East North Cen…          47381362        47358568
## # ℹ 68 more variables: POPESTIMATE2021 <dbl>, POPESTIMATE2022 <dbl>,
## #   POPESTIMATE2023 <dbl>, POPESTIMATE2024 <dbl>, NPOPCHG_2020 <dbl>,
## #   NPOPCHG_2021 <dbl>, NPOPCHG_2022 <dbl>, NPOPCHG_2023 <dbl>,
## #   NPOPCHG_2024 <dbl>, BIRTHS2020 <dbl>, BIRTHS2021 <dbl>, BIRTHS2022 <dbl>,
## #   BIRTHS2023 <dbl>, BIRTHS2024 <dbl>, DEATHS2020 <dbl>, DEATHS2021 <dbl>,
## #   DEATHS2022 <dbl>, DEATHS2023 <dbl>, DEATHS2024 <dbl>, NATURALCHG2020 <dbl>,
## #   NATURALCHG2021 <dbl>, NATURALCHG2022 <dbl>, NATURALCHG2023 <dbl>, …

Data Summary

# Generate summary statistics for the dataset
summary(data)
##     SUMLEV             REGION            DIVISION            STATE          
##  Length:66          Length:66          Length:66          Length:66         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      NAME           ESTIMATESBASE2020   POPESTIMATE2020     POPESTIMATE2021    
##  Length:66          Min.   :   576844   Min.   :   577681   Min.   :   579636  
##  Class :character   1st Qu.:  2943628   1st Qu.:  2943263   1st Qu.:  2940556  
##  Mode  :character   Median :  6024512   Median :  6026060   Median :  6026491  
##                     Mean   : 20141649   Mean   : 20145340   Mean   : 20176693  
##                     3rd Qu.: 18334475   3rd Qu.: 18335226   3rd Qu.: 18381832  
##                     Max.   :331515736   Max.   :331577720   Max.   :332099760  
##  POPESTIMATE2022     POPESTIMATE2023     POPESTIMATE2024      NPOPCHG_2020    
##  Min.   :   581978   Min.   :   585067   Min.   :   587618   Min.   :-186248  
##  1st Qu.:  2938478   1st Qu.:  2945254   1st Qu.:  2949935   1st Qu.:  -4213  
##  Median :  6041694   Median :  6069222   Median :  6103220   Median :   1297  
##  Mean   : 20292264   Mean   : 20461041   Mean   : 20661322   Mean   :   3692  
##  3rd Qu.: 18493586   3rd Qu.: 18618918   3rd Qu.: 18746957   3rd Qu.:  11098  
##  Max.   :334017321   Max.   :336806231   Max.   :340110988   Max.   : 195012  
##   NPOPCHG_2021      NPOPCHG_2022      NPOPCHG_2023      NPOPCHG_2024    
##  Min.   :-379393   Min.   :-150424   Min.   : -16345   Min.   :   -516  
##  1st Qu.:  -3730   1st Qu.:   1262   1st Qu.:  11493   1st Qu.:  16021  
##  Median :   8732   Median :  14397   Median :  34603   Median :  54629  
##  Mean   :  31353   Mean   : 115571   Mean   : 168777   Mean   : 200281  
##  3rd Qu.:  36170   3rd Qu.:  47635   3rd Qu.: 116482   3rd Qu.: 145868  
##  Max.   : 891461   Max.   :1917561   Max.   :2788910   Max.   :3304757  
##    BIRTHS2020       BIRTHS2021        BIRTHS2022        BIRTHS2023     
##  Min.   :  1303   Min.   :   5153   Min.   :   5401   Min.   :   5100  
##  1st Qu.:  6576   1st Qu.:  26649   1st Qu.:  26569   1st Qu.:  26658  
##  Median : 16786   Median :  64726   Median :  66796   Median :  65358  
##  Mean   : 54258   Mean   : 217525   Mean   : 223352   Mean   : 221431  
##  3rd Qu.: 46575   3rd Qu.: 188860   3rd Qu.: 193631   3rd Qu.: 190024  
##  Max.   :894123   Max.   :3584459   Max.   :3680380   Max.   :3648896  
##    BIRTHS2024        DEATHS2020       DEATHS2021        DEATHS2022     
##  Min.   :   5039   Min.   :  1169   Min.   :   5333   Min.   :   5603  
##  1st Qu.:  26320   1st Qu.:  6137   1st Qu.:  28777   1st Qu.:  30731  
##  Median :  64053   Median : 15158   Median :  63466   Median :  64190  
##  Mean   : 218795   Mean   : 51748   Mean   : 208895   Mean   : 209994  
##  3rd Qu.: 189169   3rd Qu.: 51068   3rd Qu.: 171797   3rd Qu.: 171247  
##  Max.   :3605563   Max.   :852024   Max.   :3438423   Max.   :3456354  
##    DEATHS2023        DEATHS2024      NATURALCHG2020   NATURALCHG2021  
##  Min.   :   5035   Min.   :   4985   Min.   :-47253   Min.   :-42470  
##  1st Qu.:  26781   1st Qu.:  25576   1st Qu.: -1270   1st Qu.: -5059  
##  Median :  59512   Median :  58894   Median :   293   Median :   320  
##  Mean   : 191702   Mean   : 187600   Mean   :  2510   Mean   :  8630  
##  3rd Qu.: 160643   3rd Qu.: 156657   3rd Qu.:  3336   3rd Qu.: 11746  
##  Max.   :3154285   Max.   :3086925   Max.   : 51560   Max.   :146036  
##  NATURALCHG2022   NATURALCHG2023   NATURALCHG2024   INTERNATIONALMIG2020
##  Min.   :-38556   Min.   :-16291   Min.   :-15701   Min.   :-1528.0     
##  1st Qu.: -5853   1st Qu.:  -789   1st Qu.:  -494   1st Qu.:  100.8     
##  Median :   798   Median :  3516   Median :  3600   Median :  282.5     
##  Mean   : 13359   Mean   : 29730   Mean   : 31195   Mean   : 1182.0     
##  3rd Qu.: 12673   3rd Qu.: 22448   3rd Qu.: 22722   3rd Qu.:  992.0     
##  Max.   :224026   Max.   :494611   Max.   :518638   Max.   :19885.0     
##  INTERNATIONALMIG2021 INTERNATIONALMIG2022 INTERNATIONALMIG2023
##  Min.   : -4317       Min.   : -28141      Min.   :    -54     
##  1st Qu.:  1749       1st Qu.:   7081      1st Qu.:  10008     
##  Median :  4775       Median :  20549      Median :  27093     
##  Mean   : 22723       Mean   : 102212      Mean   : 139048     
##  3rd Qu.: 17834       3rd Qu.:  72905      3rd Qu.: 104163     
##  Max.   :376004       Max.   :1693535      Max.   :2294299     
##  INTERNATIONALMIG2024 DOMESTICMIG2020   DOMESTICMIG2021   DOMESTICMIG2022  
##  Min.   :    506      Min.   :-132548   Min.   :-497239   Min.   :-447635  
##  1st Qu.:  12621      1st Qu.:  -4732   1st Qu.: -13672   1st Qu.: -28511  
##  Median :  33069      Median :     21   Median :   2324   Median :  -1334  
##  Mean   : 169086      Mean   :      0   Mean   :      0   Mean   :      0  
##  3rd Qu.: 126258      3rd Qu.:   7960   3rd Qu.:  26357   3rd Qu.:  18070  
##  Max.   :2786119      Max.   : 157428   Max.   : 719421   Max.   : 849097  
##  DOMESTICMIG2023   DOMESTICMIG2024       NETMIG2020          NETMIG2021     
##  Min.   :-385255   Min.   :-251161.0   Min.   :-128118.0   Min.   :-433592  
##  1st Qu.: -15728   1st Qu.:  -7257.0   1st Qu.:  -4540.8   1st Qu.:  -5134  
##  Median :      0   Median :    430.5   Median :    322.5   Median :   6435  
##  Mean   :      0   Mean   :      0.0   Mean   :   1182.0   Mean   :  22723  
##  3rd Qu.:   9858   3rd Qu.:  13171.0   3rd Qu.:   8922.2   3rd Qu.:  31387  
##  Max.   : 678977   Max.   : 411004.0   Max.   : 166386.0   Max.   : 874645  
##    NETMIG2022        NETMIG2023        NETMIG2024       RESIDUAL2020     
##  Min.   :-183999   Min.   : -51308   Min.   :    255   Min.   :-10877.0  
##  1st Qu.:   1278   1st Qu.:   8683   1st Qu.:  14173   1st Qu.:  -373.8  
##  Median :  17142   Median :  28818   Median :  40258   Median :   284.5  
##  Mean   : 102212   Mean   : 139048   Mean   : 169086   Mean   :     0.0  
##  3rd Qu.:  39641   3rd Qu.:  91000   3rd Qu.: 120446   3rd Qu.:  1212.8  
##  Max.   :1693535   Max.   :2294299   Max.   :2786119   Max.   : 12743.0  
##   RESIDUAL2021     RESIDUAL2022       RESIDUAL2023      RESIDUAL2024     
##  Min.   :-13864   Min.   :-14403.0   Min.   :-6597.0   Min.   :-1404.00  
##  1st Qu.:  -312   1st Qu.: -1238.2   1st Qu.: -585.8   1st Qu.:  -55.75  
##  Median :   418   Median :  -289.0   Median : -192.0   Median :    0.00  
##  Mean   :     0   Mean   :     0.0   Mean   :    0.0   Mean   :    0.00  
##  3rd Qu.:  1486   3rd Qu.:   336.8   3rd Qu.:   83.5   3rd Qu.:   52.00  
##  Max.   : 20084   Max.   : 23274.0   Max.   :11264.0   Max.   : 1508.00  
##    RBIRTH2021       RBIRTH2022       RBIRTH2023       RBIRTH2024    
##  Min.   : 5.745   Min.   : 6.085   Min.   : 5.879   Min.   : 5.687  
##  1st Qu.:10.200   1st Qu.:10.335   1st Qu.:10.170   1st Qu.: 9.945  
##  Median :10.759   Median :10.949   Median :10.816   Median :10.566  
##  Mean   :10.753   Mean   :10.934   Mean   :10.751   Mean   :10.534  
##  3rd Qu.:11.427   3rd Qu.:11.620   3rd Qu.:11.526   3rd Qu.:11.386  
##  Max.   :13.800   Max.   :13.752   Max.   :13.300   Max.   :13.156  
##    RDEATH2021       RDEATH2022       RDEATH2023       RDEATH2024    
##  Min.   : 6.635   Min.   : 6.927   Min.   : 6.203   Min.   : 6.250  
##  1st Qu.: 9.560   1st Qu.: 9.701   1st Qu.: 8.755   1st Qu.: 8.620  
##  Median :10.349   Median :10.681   Median : 9.771   Median : 9.557  
##  Mean   :10.584   Mean   :10.763   Mean   : 9.778   Mean   : 9.524  
##  3rd Qu.:11.456   3rd Qu.:11.720   3rd Qu.:10.590   3rd Qu.:10.364  
##  Max.   :15.418   Max.   :16.802   Max.   :14.620   Max.   :13.854  
##  RNATURALCHG2021    RNATURALCHG2022   RNATURALCHG2023   RNATURALCHG2024   
##  Min.   :-5.88215   Min.   :-7.2354   Min.   :-5.0872   Min.   :-4.90114  
##  1st Qu.:-1.20938   1st Qu.:-1.3499   1st Qu.:-0.2312   1st Qu.:-0.06743  
##  Median : 0.07771   Median : 0.1666   Median : 0.9961   Median : 0.94479  
##  Mean   : 0.16912   Mean   : 0.1711   Mean   : 0.9728   Mean   : 1.00974  
##  3rd Qu.: 1.44646   3rd Qu.: 1.6865   3rd Qu.: 2.2238   3rd Qu.: 2.17480  
##  Max.   : 7.16445   Max.   : 6.8248   Max.   : 7.0972   Max.   : 6.90559  
##  RINTERNATIONALMIG2021 RINTERNATIONALMIG2022 RINTERNATIONALMIG2023
##  Min.   :-1.3193       Min.   :-8.682        Min.   :-0.01681     
##  1st Qu.: 0.6450       1st Qu.: 2.715        1st Qu.: 3.47209     
##  Median : 0.9181       Median : 3.678        Median : 5.28127     
##  Mean   : 0.9676       Mean   : 4.086        Mean   : 5.60466     
##  3rd Qu.: 1.2229       3rd Qu.: 5.388        3rd Qu.: 7.32704     
##  Max.   : 2.6071       Max.   :14.884        Max.   :15.13142     
##  RINTERNATIONALMIG2024 RDOMESTICMIG2021   RDOMESTICMIG2022   RDOMESTICMIG2023 
##  Min.   : 0.4461       Min.   :-15.0204   Min.   :-14.9509   Min.   :-8.9700  
##  1st Qu.: 4.3246       1st Qu.: -2.8993   1st Qu.: -3.0987   1st Qu.:-2.0239  
##  Median : 6.3290       Median :  0.8057   Median : -0.2123   Median : 0.0000  
##  Mean   : 6.8110       Mean   :  1.7731   Mean   :  0.7255   Mean   : 0.6325  
##  3rd Qu.: 8.8537       3rd Qu.:  6.4518   3rd Qu.:  6.3592   3rd Qu.: 4.7545  
##  Max.   :17.9940       Max.   : 27.6901   Max.   : 16.0157   Max.   :15.0045  
##  RDOMESTICMIG2024   RNETMIG2021       RNETMIG2022       RNETMIG2023    
##  Min.   :-6.4560   Min.   :-13.580   Min.   :-9.3042   Min.   :-2.611  
##  1st Qu.:-1.4748   1st Qu.: -1.884   1st Qu.: 0.5504   1st Qu.: 3.336  
##  Median : 0.1493   Median :  1.722   Median : 3.7743   Median : 5.141  
##  Mean   : 0.6815   Mean   :  2.741   Mean   : 4.8113   Mean   : 6.237  
##  3rd Qu.: 3.0308   3rd Qu.:  7.205   3rd Qu.: 9.2830   3rd Qu.: 8.502  
##  Max.   :12.5233   Max.   : 28.062   Max.   :25.8026   Max.   :23.288  
##   RNETMIG2024     
##  Min.   : 0.3454  
##  1st Qu.: 4.9796  
##  Median : 6.3117  
##  Mean   : 7.4925  
##  3rd Qu.: 9.7671  
##  Max.   :20.5432

Data Visualization

2. Advanced Visualizations and Analysis

library(tidyverse)
library(plotly)
library(DT)
library(viridis)
library(ggthemes)
library(maps)

# Load and clean the data (same as before)
us_pop <- read_csv("~/Downloads/NST-EST2024-ALLDATA.csv")

us_pop_clean <- us_pop %>%
  filter(SUMLEV == "040") %>%
  select(
    STATE,
    NAME,
    POPESTIMATE2020:POPESTIMATE2024,
    RBIRTH2021:RNETMIG2024,
    BIRTHS2021:NETMIG2024,
    NPOPCHG_2021:NPOPCHG_2024
  ) %>%
  rename(State_Code = STATE, State_Name = NAME)

# Create an enhanced national population trend plot using ggplot2
national_data <- us_pop %>%
  filter(SUMLEV == "010") %>%
  select(NAME, POPESTIMATE2020:POPESTIMATE2024) %>%
  pivot_longer(
    cols = starts_with("POPESTIMATE"),
    names_to = "Year",
    values_to = "Population"
  ) %>%
  mutate(Year = as.numeric(str_remove(Year, "POPESTIMATE")))

fig_national <- ggplot(national_data, aes(x = Year, y = Population)) +
  geom_line(color = "steelblue", size = 1.2) +
  geom_point(color = "steelblue", size = 3) +
  labs(
    title = "US Population Trend (2020-2024)",
    x = "Year",
    y = "Population"
  ) +
  theme_economist()  # Apply a professional theme

ggplotly(fig_national)  # Convert to an interactive plot
# Create an enhanced regional population trends plot
regional_data <- us_pop %>%
  filter(SUMLEV == "020" & REGION != "0") %>%
  select(REGION, NAME, POPESTIMATE2020:POPESTIMATE2024) %>%
  pivot_longer(
    cols = starts_with("POPESTIMATE"),
    names_to = "Year",
    values_to = "Population"
  ) %>%
  mutate(Year = as.numeric(str_remove(Year, "POPESTIMATE")))

fig_regional <- ggplot(regional_data, aes(x = Year, y = Population, color = NAME)) +
  geom_line(size = 1) +
  labs(
    title = "Regional Population Trends (2020-2024)",
    x = "Year",
    y = "Population"
  ) +
  scale_color_viridis(discrete = TRUE, option = "D") +  # Use a color-blind friendly palette
  theme_excel()  # Apply a different theme for variety

ggplotly(fig_regional)
# Create a heatmap for components of population change
components_data <- us_pop_clean %>%
  select(
    State_Name,
    NPOPCHG_2024,
    BIRTHS2024,
    DEATHS2024,
    NETMIG2024
  )

components_data_long <- components_data %>%
  pivot_longer(
    cols = c(BIRTHS2024, DEATHS2024, NETMIG2024),
    names_to = "Component",
    values_to = "Value"
  ) %>%
  mutate(Component = str_remove(Component, "2024"))

fig_components_heatmap <- ggplot(components_data_long, aes(x = State_Name, y = Component, fill = Value)) +
  geom_tile(color = "white") +
  scale_fill_viridis(option = "A") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title = "Components of Population Change by State (2024) - Heatmap",
    x = "State",
    y = "Component"
  )

ggplotly(fig_components_heatmap)
# Prepare data for a potential choropleth map (not implemented in this code)
state_change_data <- us_pop_clean %>%
  mutate(Pop_Change = POPESTIMATE2024 - POPESTIMATE2023) %>%
  mutate(Change_Rate = (Pop_Change / POPESTIMATE2023) * 100) %>%
  select(State_Name, Change_Rate, State_Code)

state_codes <- data.frame(
  State_Name = state.name,
  State_Code = as.character(state.abb),
  stringsAsFactors = FALSE
)

Conclusion and Next Steps

This R Markdown file provides a comprehensive analysis of US population trends from 2020 to 2024. It includes various visualizations such as line plots for national and regional trends, bar plots for components of population change, and a heatmap for state-level analysis. The use of interactive plots (via plotly) allows for more detailed exploration of the data.

The analysis covers national population growth, regional trends, state-level changes, and components of population change (births, deaths, and migration). This aligns with the reported population growth of nearly 1% between 2023 and 2024, which was the highest since 2001.

The visualizations help illustrate key findings such as: 1. The overall upward trend in US population 2. Differences in population growth rates among regions 3. Variations in components of population change across states

These insights can be valuable for understanding demographic shifts, informing policy decisions, and projecting future population trends.