Pobieramy dane
## https://data.europa.eu/data/datasets/cbpeglqdhx6jfxjb8vog?locale=pl
b0 <- get_eurostat("tps00204") %>% filter(indic_de == 'LBIRTH_NR')
Przekształcamy na udziały wzlędem liczby urodzin z roku 2010. Pomijamy agregaty i niektóre kraje:
## Transform to % of last observation
## remove countries not reporting for 2021
b <- b0 %>%
filter (! geo %in% outc) %>%
select (geo, time, values) %>%
mutate (time = as.character(time)) %>%
mutate (year = as.numeric(substr(time, 1, 4)),
geo = as.factor(geo)) %>%
filter (year >= firstYr ) %>%
group_by(geo) %>%
arrange(year) %>%
mutate (v = values / first(values) * 100, ly= first(year), lsty= last(year)) %>%
filter (lsty >= currentYr ) %>%
filter (! geo %in% outc2) %>%
mutate( grp=case_when(geo == "PL" ~ "1", TRUE ~ "0") ) %>%
ungroup()
Wykres:
b.pl <- b %>% filter (geo == 'PL')
b1 <- b %>% filter (year == lsty)
p2 <- ggplot(b, aes(x=year, y=v, color=geo, group=geo )) +
geom_line(color='skyblue') +
geom_line(data = b.pl, colour = "red") +
scale_x_continuous(breaks=seq(firstYr, currentYr +2, by=1)) +
geom_text(data=b1, aes(label=sprintf("%s", geo ),),
hjust=-0.25, size=2, color='black' ) +
ggtitle(sprintf ("Liczba urodzeń w Europie %i--%i (2020=100%%)", firstYr, currentYr ),
subtitle='Linia czerwona = PL') +
labs(caption="Dane: Eurostat/tabela tps00204") +
ylab("%")
p2
Albo jako wykres słupkowy:
p2x <- ggplot(b1, aes(x = reorder(geo, v), y = v, fill=grp )) +
ggtitle(sprintf('Urodzenia żywe w Europie w %i (%i = 100%%)', currentYr, firstYr)) +
xlab("") + ylab("%") +
#scale_x_date( labels = date_format("%Y"), breaks ="1 year") +
geom_bar(position = 'dodge', stat = 'identity' ) +
theme(legend.position="none") +
scale_color_manual( values = c( "1" = "#F8766D", "0" = "#00BFC4" ), guide = FALSE ) +
geom_text(aes(label=sprintf("%.1f", v ),),
vjust=-1.25, size=2, color='black' ) +
labs(caption="Dane: Eurostat/tabela tps00204")
p2x
demo_fmonth
)Pobieramy dane (pomijamy TOTAL
oraz
UNK
)
m0 <- get_eurostat("demo_fmonth") %>%
filter ((! month == "TOTAL") & (! month == 'UNK') )
Tylko duże kraje (Duże to te których w 2021 roku zanotowano minimum 99000 żywych urodzeń):
m0.big <- m0 %>%
filter (! geo %in% outc) %>%
mutate (time = as.character(time),
year = as.numeric(substr(time, 1, 4)) ) %>%
filter (year == currentYr) %>%
group_by(geo, year) %>%
summarise( totalY = sum(values) ) %>%
filter (totalY > 99000)
## vector of big countries
big.C <- m0.big$geo
Teraz wykres:
m <- m0 %>%
filter ( geo %in% big.C & (! geo %in% outc ) ) %>%
mutate (time = as.character(time)) %>%
mutate (year = as.numeric(substr(time, 1, 4)),
mm = substr(month, 2, 3),
date = sprintf ("%i-%s-01", year, mm),
geo = as.factor(geo)) %>%
## pomiń lata wcześniejsze niż firstYr
filter (year >= firstYr )
p2m <- ggplot(m, aes(x=as.Date(date), y=values, color=geo, group=geo )) +
geom_point(size=.4, alpha=.2) +
geom_smooth(method="loess", se=F, span=.5) +
geom_line(alpha=.2) +
ggtitle(sprintf ("Liczba urodzeń w Europie %i--%i", firstYr, currentYr),
subtitle='(kraje o liczbie urodzeń w 2021 większej od 99 tys/rok)') +
labs(caption="Dane: Eurostat/tabela demo_fmonth") +
scale_x_date( breaks = "18 months", labels = date_format("%y/%m")) +
ylab("") +
xlab("yy/mm")
p2m
tgs00100
)Współczynnik płodności wg makroregionów (dla wyżej zdefiniowanych dużych krajów)
x0 <- get_eurostat("tgs00100") %>%
select (geo, time, values) %>%
mutate (member = substr(geo, 1, 2)) %>%
filter (member %in% big.C )
x0.pl <- x0 %>% filter (member == 'PL')
big.C.txt <- toString(big.C)
Wykres
p12 <- ggplot(x0, aes(x=as.Date(time), y=values, color=geo, group=geo )) +
geom_line(color='skyblue') +
geom_line(data = x0.pl, colour = "red") +
ggtitle(sprintf ("Współczynnik płodności wg NUTS2 (%s)", big.C.txt ),
subtitle='(linia czerwnona = PL)') +
labs(caption="Dane: Eurostat/tabela tgs00100") +
ylab("") + xlab("")
p12
Tylko polskie makroregiony
## dodaj nazwy
nuts <- read.csv("nuts.csv", sep = ';', header=T, na.string="NA" )
x0.pl1 <- x0.pl %>% filter (time == "2020-01-01") %>%
left_join(nuts, by="geo")
p12pl <- ggplot(x0.pl, aes(x=as.Date(time), y=values, color=geo, group=geo )) +
geom_line(color='red') +
geom_point(color='red', alpha=.2) +
geom_text(data=x0.pl1, aes(label=sprintf("%s", name ),),
vjust=-0.25, size=2, color='red', alpha=.4 ) +
ggtitle(sprintf ("Współczynnik płodności wg NUTS2 (Polska)") ) +
labs(caption="Dane: Eurostat/tabela tgs00100") +
ylab("%") + xlab("") +
coord_cartesian(xlim = c(as.Date("2009-06-01"), as.Date("2021-01-01")))
p12pl