knitr::opts_chunk$set(echo = TRUE)
if(!require(pacman)) install.packages("pacman")
## Loading required package: pacman
pacman::p_load("tidyverse")
pacman::p_load("eurostat")
pacman::p_load("stringr")
pacman::p_load("lubridate")
pacman::p_load("remotes")
pacman::p_load("gganimate")

https://www.nytimes.com/interactive/2018/08/04/upshot/up-birth-age-gap.html

Relevant datasets

# search
id <- search_eurostat("births by mother's age")

knitr::kable(id)
title code type last update of data last table structure change data start data end values
Live births by mother’s age and NUTS 2 region demo_r_fagec dataset 01.03.2019 28.02.2019 1990 2017 NA
Live births by mother’s age and newborn’s sex demo_fasec dataset 02.04.2019 28.02.2019 2007 2017 NA
Live births by mother’s age and birth order demo_fordagec dataset 02.04.2019 14.03.2019 1960 2017 NA
Live births by mother’s age and legal marital status demo_fagec dataset 02.04.2019 21.03.2019 1960 2017 NA
Live births by mother’s age and educational attainment level demo_faeduc dataset 28.02.2019 28.02.2019 2007 2017 NA
Live births by mother’s age and activity status demo_faemplc dataset 27.02.2019 27.02.2019 2007 2017 NA
Live births by mother’s age and citizenship demo_faczc dataset 22.03.2019 28.02.2019 2007 2017 NA
Live births by mother’s age and country of birth demo_facbc dataset 21.03.2019 28.02.2019 2007 2017 NA
Live births by mother’s age and NUTS 2 region demo_r_fagec dataset 01.03.2019 28.02.2019 1990 2017 NA

1980 VS 2016

id <- search_eurostat("Live births by mother’s age and legal marital status")

demo_fagec <- label_eurostat(get_eurostat(id = "demo_fagec"))
demo_fagec_italy <- demo_fagec %>% 
    select(-unit) %>% 
    filter(geo=="Italy") %>% 
    mutate(age=as.character(age)) %>% 
    filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% 
        mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
    filter(age<45) %>% #keep only birth age until 45
    filter(indic_de=="Live births - total") %>% #Including all live birhts %>% 
select(-indic_de) %>% 
    mutate(year = lubridate::year(time)) %>% 
    select(-geo, -time)
    


demo_fagec_italy_gg_animated <- demo_fagec_italy %>% 
    ggplot(mapping = aes(x = age, y = values, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) +
    theme_minimal() +
    labs(title = "Number of live births by age of mother in Italy in {round(frame_time)}") +
    transition_time(year) +
    ease_aes('linear')

# animate(plot = demo_fagec_italy_gg_animated, renderer = ffmpeg_renderer())

# anim_save(filename = "demo_fagec_italy_gg_animated.mp4", animation = animate(plot = demo_fagec_italy_gg_animated, renderer = ffmpeg_renderer()))

# anim_save(filename = "demo_fagec_italy_gg_animated.gif", animation = animate(plot = demo_fagec_italy_gg_animated))
#knitr::kable(demo_fagec_italy %>% filter(year==2016))

demo_fagec_italy_gg_animated

demo_fagec_italy_gg_animated_1980_2016 <- 
demo_fagec_italy %>% 
    filter(year==1980|year==2016) %>% 
    ggplot(mapping = aes(x = age, y = values, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
    theme_minimal() +
    labs(title = "Number of live births by age of mother in Italy in {closest_state}") +
  transition_states(
    year,
    transition_length = 0.5,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')

demo_fagec_italy_gg_animated_1980_2016

By marriage status

demo_fagec_italy_2016_marriage <- demo_fagec %>% 
    select(-unit) %>% 
    filter(geo=="Italy") %>% 
    mutate(age=as.character(age)) %>% 
    filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% 
        mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
     
    filter(age<45) %>% #keep only birth age until 48
    mutate(indic_de = as.character(indic_de)) %>% 
    filter(indic_de=="In marriage"|indic_de=="Outside marriage") %>% #Including all live birhts %>% 
    mutate(year = lubridate::year(time)) %>% 
    filter(year ==2016) %>%
    select(-geo, -time)
demo_fagec_italy_gg_animated_1980_2016_marriage <- 
demo_fagec_italy_2016_marriage %>% 
    ggplot(mapping = aes(x = age, y = values, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
    theme_minimal() +
    labs(title = "Number of live births by marriage status of the mother in Italy in 2016:\n{closest_state}") +
  transition_states(
    indic_de,
    transition_length = 0.5,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')

demo_fagec_italy_gg_animated_1980_2016_marriage

By education

# extract exact code
id <- search_eurostat("Live births by mother's age and educational attainment level")

demo_faeduc <- label_eurostat(get_eurostat(id = "demo_faeduc"))
## Table demo_faeduc cached at /tmp/RtmpVLv5LF/eurostat/demo_faeduc_date_code_TF.rds
demo_faeduc_ro <- demo_faeduc %>% 
 select(-unit) %>% 
    filter(geo=="Romania") %>% 
    mutate(age=as.character(age)) %>% 
    filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% 
        mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
    filter(age<45) %>% #keep only birth age until 48
    mutate(year = lubridate::year(time)) %>% 
    select(-geo, -time) %>% 
     mutate(isced11 = as.character(isced11)) %>% 
     filter(isced11=="Less than primary, primary and lower secondary education (levels 0-2)"|isced11=="Upper secondary and post-secondary non-tertiary education (levels 3 and 4)"| isced11=="Tertiary education (levels 5-8)") %>% 
    mutate(isced11 = factor(x = isced11, levels = c("Less than primary, primary and lower secondary education (levels 0-2)", "Upper secondary and post-secondary non-tertiary education (levels 3 and 4)", "Tertiary education (levels 5-8)"),labels = c("Less than primary, primary and lower secondary education", "Upper secondary and post-secondary non-tertiary education", "Tertiary education")))

demo_faeduc_ro_gganim <- demo_faeduc_ro %>% 
ggplot(mapping = aes(x = age, y = values, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
    theme_minimal() +
    labs(title = "Number of live births by education level of the mother in Romania:\n{closest_state}") +
  transition_states(
    isced11,
    transition_length = 0.5,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')
    

demo_faeduc_ro_gganim

demo_faeduc_see <- demo_faeduc %>% 
    mutate(geo = stringr::str_replace(string = as.character(geo),pattern = "Former Yugoslav Republic of Macedonia, the", replacement = "Macedonia")) %>% 
 select(-unit) %>% 
    filter(geo=="Romania"|
               geo=="Serbia"|
               geo=="Bulgaria"|
              geo=="Albania"|
               # geo=="Georgia"|
               # geo=="Armenia"|
               # geo=="Azerbaijan"|
               geo=="Croatia"|
               geo=="Macedonia"|
               geo=="Slovenia") %>% 
    mutate(age=as.character(age)) %>% 
    filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% 
        mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
    filter(age<45) %>% #keep only birth age until 48
    mutate(year = lubridate::year(time)) %>% 
    select(-time) %>% 
     mutate(isced11 = as.character(isced11)) %>% 
     filter(isced11=="Less than primary, primary and lower secondary education (levels 0-2)"|isced11=="Upper secondary and post-secondary non-tertiary education (levels 3 and 4)"| isced11=="Tertiary education (levels 5-8)") %>% 
    mutate(isced11 = factor(x = isced11, levels = c("Less than primary, primary and lower secondary education (levels 0-2)", "Upper secondary and post-secondary non-tertiary education (levels 3 and 4)", "Tertiary education (levels 5-8)"),labels = c("Less than primary, primary and lower secondary education", "Upper secondary and post-secondary non-tertiary education", "Tertiary education")))

demo_faeduc_see_gganim <- demo_faeduc_see %>% 
ggplot(mapping = aes(x = age, y = values, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
    theme_minimal() +
    facet_grid(geo ~ .) +
    labs(title = "Number of live births by marriage status of the mother in Romania:\n{closest_state}") +
  transition_states(
    isced11,
    transition_length = 0.5,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')

demo_faeduc_see_gganim

demo_faeduc_see_pct <- demo_faeduc_see %>% 
  group_by(geo, year, isced11) %>% 
  mutate(pct = values / sum(values))

demo_faeduc_see_gganim_pct <- demo_faeduc_see_pct %>% 
ggplot(mapping = aes(x = age, y = pct, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Share of births by age of mother", labels = scales::percent) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
    theme_minimal() +
    facet_grid(geo ~ .) +
    labs(title = "Share of births by age of mother and education in SEE\n{closest_state}", caption = "Source: Eurostat, indicator demo_faeduc") +
  transition_states(
    isced11,
    transition_length = 0.5,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')

demo_faeduc_see_gganim_pct

By birth order

demo_fordagec <- label_eurostat(get_eurostat(id = "demo_fordagec"))
## Table demo_fordagec cached at /tmp/RtmpVLv5LF/eurostat/demo_fordagec_date_code_TF.rds
demo_fordagec_italy <- demo_fordagec %>% 
    select(-unit) %>% 
    filter(geo=="Italy", ord_brth == "First") %>% 
    mutate(age=as.character(age)) %>% 
    filter(stringr::str_detect(string = age, pattern = "^[[:digit:]][[:digit:]] years")) %>% 
    mutate(age=as.numeric(stringr::str_extract(string = age, pattern = "[[:digit:]][[:digit:]]"))) %>% #Removing data below 16 and setting all above 50 at 50
    filter(age<45) %>% #keep only birth age until 48
    mutate(year = lubridate::year(time)) %>% 
    select(-geo, -time)

#knitr::kable(demo_fordagec_italy %>% filter(year==2016))
demo_fordagec_italy %>% 
    filter(year == 2016) %>% 
    ggplot(mapping = aes(x = age, y = values)) +
    geom_col()
demo_fordagec_italy_gg_animated <- demo_fordagec_italy %>% 
    ggplot(mapping = aes(x = age, y = values, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Number of live births per year", labels = scales::comma) +
    scale_x_continuous(name = "Ages of first-time mothers") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) +
    theme_minimal() +
    labs(title = "Number of live births by age of first-time mothers in Italy in {round(frame_time)}") +
    transition_time(year) +
    ease_aes('linear')

demo_fordagec_italy_gg_animated

# animate(plot = demo_fordagec_italy_gg_animated, renderer = ffmpeg_renderer())

# anim_save(filename = "demo_fordagec_italy_gg_animated.mp4", animation = animate(plot = demo_fordagec_italy_gg_animated, renderer = ffmpeg_renderer()))

# anim_save(filename = "demo_fordagec_italy_gg_animated.gif", animation = animate(plot = demo_fordagec_italy_gg_animated))
demo_fordagec_italy_pct <- demo_fordagec_italy %>% 
    group_by(year) %>% 
  mutate(pct = values / sum(values))
demo_fordagec_italy_gg_animated_pct <- demo_fordagec_italy_pct %>% 
    ggplot(mapping = aes(x = age, y = pct, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Percent of live births per year", labels = scales::percent) +
    scale_x_continuous(name = "Ages of first-time mothers") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", limits = c(14, 44), guide = FALSE) +
    theme_minimal() +
    labs(title = "Ages of first-time mothers in Italy in {round(frame_time)}") +
    transition_time(year) +
    ease_aes('linear')

demo_fordagec_italy_gg_animated_pct

demo_fordagec_italy_gg_animated_1980_2016_pct <- 
    demo_fordagec_italy_pct %>% 
    filter(year==1980|year==2016) %>% 
    ggplot(mapping = aes(x = age, y = pct, fill = age)) +
    geom_col() +
    scale_y_continuous(name = "Share of live births per year", labels = scales::percent) +
    scale_x_continuous(name = "Mother's age at birth") +
    scale_fill_gradient2(low = "#b45699", mid = "burlywood1", midpoint = 25, high = "#9bbd84", guide = FALSE) +
    theme_minimal() +
    labs(title = "Ages of first-time mothers in Italy in {closest_state}") +
    transition_states(
        year,
        transition_length = 0.5,
        state_length = 1
    ) +
    enter_fade() + 
    exit_shrink() +
    ease_aes('sine-in-out')

demo_fordagec_italy_gg_animated_1980_2016_pct