Thesis

Author

Vladyslava Bondarenko

library(eurostat)
Warning: package 'eurostat' was built under R version 4.4.3
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)

gdp_pc <- get_eurostat("nama_10_pc", time_format = "num")

indexed 0B in  0s, 0B/s
indexed 2.15GB in  0s, 2.15GB/s
                                                                              
Table nama_10_pc cached at C:\Users\User\AppData\Local\Temp\RtmpAXZhPk/eurostat/dea8e78cad1ed5f1d237fc09e91d8348.rds
countries <- c("DE", "FR", "NL", "PL", "EU27_2020")

target_years <- 2020:2024

filtered_data <- gdp_pc %>%
  filter(
    geo %in% countries,
    TIME_PERIOD %in% 2020:2024,
    na_item == "B1GQ",
    unit == "CLV10_EUR_HAB",  # real GDP per capita
    !is.na(values)
  )


ggplot(filtered_data, aes(x = factor(TIME_PERIOD), y = values, fill = geo)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Real GDP per Capita in EUR (2010 prices) (2020–2024)",
    x = "Year",
    y = "GDP per Capita (EUR, constant 2010 prices)",
    fill = "Country"
  ) +
  theme_minimal() +
  theme(legend.position = "right")

World bank data on GDP

library(WDI)
Warning: package 'WDI' was built under R version 4.4.3
countries <- c("DE", "FR", "NL", "PL", "GB", "UA")

indicators <- c(
  "NY.GDP.PCAP.CD",      # GDP per capita (current US$)
  "SH.STA.DIAB.ZS"      # Diabetes prevalence (% of population ages 20 to 79)
)

data <- WDI(country = countries, indicator = indicators, start = 2019, end = 2023)

head(data)
  country iso2c iso3c year NY.GDP.PCAP.CD SH.STA.DIAB.ZS
1  France    FR   FRA 2019       40408.28             NA
2  France    FR   FRA 2020       39169.86             NA
3  France    FR   FRA 2021       43725.10            5.3
4  France    FR   FRA 2022       41082.81             NA
5  France    FR   FRA 2023       44690.93             NA
6 Germany    DE   DEU 2019       47623.87             NA
gdp_data <- data %>%
  filter(!is.na(NY.GDP.PCAP.CD))

ggplot(gdp_data, aes(x = year, y = NY.GDP.PCAP.CD, color = country)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  labs(
    title = "GDP per Capita Over Time",
    x = "Year",
    y = "GDP per Capita (current US$)",
    color = "Country"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Elderly population

elderly_data <- WDI(
  country = countries,
  indicator = "SP.POP.65UP.TO.ZS",
  start = 2022,
  end = 2023
)

elderly_latest <- elderly_data %>%
  group_by(country) %>%
  filter(year == max(year)) %>%
  ungroup()

elderly_latest <- elderly_latest %>%
  rename(elderly_pct = SP.POP.65UP.TO.ZS)

ggplot(elderly_latest, aes(x = reorder(country, elderly_pct), y = elderly_pct, fill = country)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Elderly Population (% of total) in 2023",
    x = "Country",
    y = "Population aged 65+ (%)"
  ) +
  theme_minimal()

table_elderly <- elderly_latest %>%
  select(country, year, elderly_pct) %>%
  arrange(desc(elderly_pct))

print(table_elderly)
# A tibble: 6 × 3
  country         year elderly_pct
  <chr>          <int>       <dbl>
1 Germany         2023        22.8
2 France          2023        21.7
3 Netherlands     2023        20.2
4 Poland          2023        19.6
5 United Kingdom  2023        19.2
6 Ukraine         2023        18.6

Tech evaluation

# internet usage data for 2022-2023 (latest available)
internet_data <- WDI(
  country = countries,
  indicator = "IT.NET.USER.ZS",
  start = 2022,
  end = 2023
)

internet_latest <- internet_data %>%
  group_by(country) %>%
  filter(year == max(year)) %>%
  ungroup() %>%
  rename(internet_penetration_pct = IT.NET.USER.ZS)

print(internet_latest %>% select(country, year, internet_penetration_pct) %>% arrange(desc(internet_penetration_pct)))
# A tibble: 6 × 3
  country         year internet_penetration_pct
  <chr>          <int>                    <dbl>
1 Netherlands     2023                     97  
2 United Kingdom  2023                     96.3
3 Germany         2023                     92.5
4 France          2023                     86.8
5 Poland          2023                     86.4
6 Ukraine         2023                     82.4
ggplot(internet_latest, aes(x = reorder(country, internet_penetration_pct), y = internet_penetration_pct, fill = country)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Internet Penetration (% of Population) in Latest Available Year",
    x = "Country",
    y = "Internet Users (%)"
  ) +
  theme_minimal()

table_internet <- internet_latest %>%
  select(country, year, internet_penetration_pct) %>%
  arrange(desc(internet_penetration_pct))

print(table_internet)
# A tibble: 6 × 3
  country         year internet_penetration_pct
  <chr>          <int>                    <dbl>
1 Netherlands     2023                     97  
2 United Kingdom  2023                     96.3
3 Germany         2023                     92.5
4 France          2023                     86.8
5 Poland          2023                     86.4
6 Ukraine         2023                     82.4

innovation

rd_data <- WDI(country = countries, indicator = "GB.XPD.RSDV.GD.ZS", start = 2020, end = 2023)

rd_latest <- rd_data %>%
  filter(!is.na(GB.XPD.RSDV.GD.ZS)) %>%
  group_by(country) %>%
  filter(year == max(year)) %>%
  ungroup()

ggplot(rd_latest, aes(x = reorder(country, GB.XPD.RSDV.GD.ZS), y = GB.XPD.RSDV.GD.ZS, fill = country)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "R&D Expenditure (% of GDP) Latest Available Year (2020-2023)",
    x = "Country",
    y = "R&D Expenditure (% of GDP)"
  ) +
  theme_minimal()

rd_table <- rd_latest %>%
  select(country, year, RD_Expenditure_Percent_GDP = GB.XPD.RSDV.GD.ZS) %>%
  arrange(desc(RD_Expenditure_Percent_GDP))

print(rd_table)
# A tibble: 6 × 3
  country         year RD_Expenditure_Percent_GDP
  <chr>          <int>                      <dbl>
1 Germany         2022                      3.13 
2 United Kingdom  2021                      2.90 
3 Netherlands     2022                      2.26 
4 France          2022                      2.23 
5 Poland          2022                      1.45 
6 Ukraine         2023                      0.327

HC expenditures

health_exp <- WDI(country = countries, indicator = "SH.XPD.CHEX.GD.ZS", start = 2020, end = 2023)

health_latest <- health_exp %>%
  filter(!is.na(SH.XPD.CHEX.GD.ZS)) %>%
  group_by(country) %>%
  filter(year == max(year)) %>%
  ungroup()

ggplot(health_latest, aes(x = reorder(country, SH.XPD.CHEX.GD.ZS), y = SH.XPD.CHEX.GD.ZS, fill = country)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(
    title = "Healthcare Expenditure (% of GDP) Latest Available Year (2020-2023)",
    x = "Country",
    y = "Health Expenditure (% GDP)"
  ) +
  theme_minimal()

health_table <- health_latest %>%
  select(country, year, Health_Expenditure_Percent_GDP = SH.XPD.CHEX.GD.ZS) %>%
  arrange(desc(Health_Expenditure_Percent_GDP))

print(health_table)
# A tibble: 6 × 3
  country         year Health_Expenditure_Percent_GDP
  <chr>          <int>                          <dbl>
1 France          2022                          11.9 
2 Germany         2023                          11.8 
3 United Kingdom  2023                          10.9 
4 Netherlands     2022                          10.1 
5 Ukraine         2021                           8.20
6 Poland          2023                           7.00