#|label: load-packages
#|include: false

knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
congress_demographics <- read_csv("https://raw.githubusercontent.com/mraynolds/data_607/refs/heads/main/data_aging_congress.csv", show_col_types = FALSE)

Overview: Our Aging Congress

Congress is older than ever, and getting older. This is the topic of 538’s article Congress Today is Older Than It’s Ever Been. The article is a look into the generational demographics of the United States Congress from the 66th to the 118th congress, taking place from 1919 to 2023. The article explores how congress has aged over time in comparison with the rest of the population, and considers how an older congress may prioritize issues relevant to their own demographic.

To read the original article see https://fivethirtyeight.com/features/aging-congress-boomers/.

To access the original data see https://raw.githubusercontent.com/mraynolds/data_607/refs/heads/main/data_aging_congress.csv

A glimpse at the original data:

glimpse(congress_demographics)
## Rows: 29,120
## Columns: 13
## $ congress      <dbl> 82, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, …
## $ start_date    <date> 1951-01-03, 1947-01-03, 1949-01-03, 1951-01-03, 1953-01…
## $ chamber       <chr> "House", "House", "House", "House", "House", "House", "H…
## $ state_abbrev  <chr> "ND", "VA", "VA", "VA", "VA", "VA", "VA", "VA", "VA", "V…
## $ party_code    <dbl> 200, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 1…
## $ bioname       <chr> "AANDAHL, Fred George", "ABBITT, Watkins Moorman", "ABBI…
## $ bioguide_id   <chr> "A000001", "A000002", "A000002", "A000002", "A000002", "…
## $ birthday      <date> 1897-04-09, 1908-05-21, 1908-05-21, 1908-05-21, 1908-05…
## $ cmltv_cong    <dbl> 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4…
## $ cmltv_chamber <dbl> 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4…
## $ age_days      <dbl> 19626, 14106, 14837, 15567, 16298, 17028, 17759, 18489, …
## $ age_years     <dbl> 53.73306, 38.62012, 40.62149, 42.62012, 44.62149, 46.620…
## $ generation    <chr> "Lost", "Greatest", "Greatest", "Greatest", "Greatest", …

The following code transforms the original data for use during analysis:

g_demo <- congress_demographics
cong_demo <- congress_demographics |>
  select(congress:bioname,birthday,age_days:generation) |> 
  relocate(bioname, age_years, congress, state_abbrev, party_code, chamber, generation) |> 
  rename(party = party_code, age_in_days = age_days, age_in_years = age_years) |> 
  janitor::clean_names() |>
  arrange(congress,age_in_years) |> 
  mutate(party = as.character(party),
         party = str_replace_all(party, "100","Democrat"),
         party = str_replace_all(party, "200", "Republican"),
         party = str_replace_all(party, "328", "Independent"),
         party = str_replace_all(party, "\\d+", "Other")) |> 
  separate_wider_delim(
    bioname,
    delim = ",",
    names = c("name_last", "name_first"),
    too_few = ,
    too_many = "merge") |> 
  separate_wider_delim(
    start_date,
    delim = "-",
    names = c("start_year", "start_month", "start_day")) |> 
  separate_wider_delim(
    birthday,
    delim = "-",
    names = c("birth_year", "birth_month", "birth_day"))

A glimpse at the transformed data:

glimpse(cong_demo)
## Rows: 29,120
## Columns: 15
## $ name_last    <chr> "McLEOD", "SWOPE", "MILLIGAN", "FISH", "PERLMAN", "BAER",…
## $ name_first   <chr> " Clarence John", " King", " Jacob Le Roy", " Hamilton", …
## $ age_in_years <dbl> 23.66598, 25.56057, 29.98220, 30.23409, 31.58385, 32.9281…
## $ congress     <dbl> 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 6…
## $ state_abbrev <chr> "MI", "KY", "MO", "NY", "NY", "ND", "NY", "MD", "SC", "WI…
## $ party        <chr> "Republican", "Republican", "Democrat", "Republican", "Re…
## $ chamber      <chr> "House", "House", "House", "House", "House", "House", "Ho…
## $ generation   <chr> "Lost", "Lost", "Lost", "Lost", "Lost", "Lost", "Lost", "…
## $ start_year   <chr> "1919", "1919", "1919", "1919", "1919", "1919", "1919", "…
## $ start_month  <chr> "03", "03", "03", "03", "03", "03", "03", "03", "03", "03…
## $ start_day    <chr> "04", "04", "04", "04", "04", "04", "04", "04", "04", "04…
## $ birth_year   <chr> "1895", "1893", "1889", "1888", "1887", "1886", "1885", "…
## $ birth_month  <chr> "07", "08", "03", "12", "08", "03", "12", "06", "05", "05…
## $ birth_day    <chr> "03", "10", "09", "07", "02", "29", "27", "20", "07", "06…
## $ age_in_days  <dbl> 8644, 9336, 10951, 11043, 11536, 12027, 12119, 12309, 123…

Preliminary Data Exploration

A plot of the average age by the number of the congress shows the age trending up over time.

A plot of the average age by congress separated by major party (Democrat or Republican) shows the age trending up over time.

Other parties are not plotted here. For a sense of the scale of that redaction, here is a table with a count of the observations by party.

## # A tibble: 4 × 2
##   party           n
##   <chr>       <int>
## 1 Democrat    15804
## 2 Republican  13181
## 3 Other          91
## 4 Independent    44

Findings & Recommendations

A closer look at the demographics of age by location and party would be an interesting extension of the work in the article. An analysis of the average time spent in congress plotted over time would also provide insight into how members are treating the role: as an act of public service or as a career. Finally, the data would benefit from an update to include the 119th congress.