Przygotowanie

Wczytanie potrzebnych bibliotek

library(kableExtra)
library(tidyverse)
library(readr)
library(scales)
library(ggrepel)
library(lubridate)

Wczytanie danych oraz przygotowanie wszelkich pomocniczych ramek danych.

owid_covid_data <- read_csv("owid_covid_data.csv", show_col_types = FALSE)
excess_mortality <- read_csv("excess_mortality.csv", show_col_types = FALSE)
continents = owid_covid_data %>% filter(!is.na(continent)) %>% distinct(continent)
locations = owid_covid_data %>% filter(!is.na(continent)) %>% distinct(location)
eu = owid_covid_data %>% filter(continent == "Europe") %>% distinct(location)
owid_covid_data_eu = owid_covid_data %>% filter(location %in% eu$location)
excess_mortality_eu = excess_mortality %>% filter(location %in% eu$location)
set.seed(123)
selected_locations_eu = eu %>% filter(location != "Poland") %>% slice_sample(n=3)
selected_period = seq(dmy('01.05.2021'), dmy('31.12.2021'), by=1)
selected_date = seq(dmy('01.05.2021'), dmy('31.12.2021'), by=1) %>% sample(1)
location_population = owid_covid_data %>% select(location, population) %>% 
  distinct(location, population)

Zadanie 1

Na podstawie owid_covid_data obliczyć średnią liczbę wykonanych dziennie szczepień (w przeliczeniu na milion mieszkańców) w każdym kraju w okresie od 01.05.2021r. do 31.12.2021r. Podsumowanie zaprezentować na wykresie pudełkowym z podziałem na kontynenty.

Dane z wybranego okresu oraz z poszczególnych krajów świata.

selected_data = owid_covid_data %>% 
  filter(date %in% selected_period & location %in% locations$location)

Podsumowanie w obrębie kontynentów oraz krajów.

sum_selected_data = selected_data %>% 
  group_by(continent, location) %>% 
  summarise(mean_nvspm = mean(new_vaccinations_smoothed_per_million, na.rm = T), .groups = "keep") %>% 
  filter(!is.nan(mean_nvspm)) %>%
  ungroup() %>% 
  arrange(desc(mean_nvspm))

sum_selected_data 
## # A tibble: 218 × 3
##    continent     location                        mean_nvspm
##    <chr>         <chr>                                <dbl>
##  1 Africa        Saint Helena                        15915 
##  2 Oceania       Tokelau                             12718.
##  3 North America Cuba                                11910.
##  4 Oceania       Pitcairn                            10892.
##  5 Oceania       Niue                                10645.
##  6 Asia          Brunei                               8003.
##  7 Asia          South Korea                          7863.
##  8 North America Bonaire Sint Eustatius and Saba      7412 
##  9 Asia          China                                7227.
## 10 Europe        Denmark                              7099.
## # … with 208 more rows
sum_selected_data %>% 
  ggplot(aes(continent, mean_nvspm, fill=continent))+
  geom_boxplot()

Zadanie 2

  1. Na podstawie owid_covid_data_eu, na dzień selected_date, przeanalizować zależność między (wszystkie dane w przeliczeniu na milion mieszkańców)
selected_data = owid_covid_data_eu %>% 
  filter(date == selected_date & location %in% locations$location)
selected_data %>% 
  filter(!is.na(population) & !is.na(aged_70_older) & !is.na(total_deaths_per_million)) %>% 
  ggplot(aes(population*aged_70_older,total_deaths_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks()

selected_data %>% 
  filter(!is.na(median_age) & !is.na(total_deaths_per_million)) %>% 
  ggplot(aes(median_age,total_deaths_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks(sides ="l")

selected_data %>% 
  filter(!is.na(population_density) & !is.na(total_deaths_per_million)) %>% 
  ggplot(aes(population_density,total_deaths_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks()

selected_data %>%
  filter(!is.na(total_tests_per_thousand) & !is.na(total_cases_per_million)) %>%
  ggplot(aes(total_tests_per_thousand, total_cases_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks(sides ="b")

selected_data %>%
  filter(!is.na(total_tests_per_thousand) & !is.na(total_deaths_per_million)) %>%
  ggplot(aes(total_tests_per_thousand, total_deaths_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks(sides ="b")

selected_data %>%
  filter(!is.na(icu_patients_per_million) & !is.na(new_cases_smoothed_per_million)) %>%
  ggplot(aes(icu_patients_per_million, new_cases_smoothed_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks(sides ="b")

selected_data %>%
  filter(!is.na(icu_patients_per_million) & !is.na(new_deaths_smoothed_per_million)) %>%
  ggplot(aes(icu_patients_per_million, new_deaths_smoothed_per_million))+
  geom_point()+
  geom_smooth(method = 'loess', formula = 'y ~ x')+
  scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
                labels = trans_format("log10", math_format(10^.x))) +
  annotation_logticks(sides ="b")

Zadanie 3

Na podstawie excess_mortality_eu porównać liczby zgonów w poszczególnych tygodniach w latach 2020 i 2021 ze średnią za lata 2015 - 2019 w podziale na wszystkie kraje Europy. Wszystkie dane przeliczyć na milion mieszkańców (wielkość populacji można znaleźć w ramce owid_covid_data_eu).

county_monthly = excess_mortality_eu %>% 
  filter(time_unit == "monthly") %>% 
  distinct(location)

county_weekly = excess_mortality_eu %>% 
  filter(time_unit == "weekly") %>% 
  distinct(location)

df = excess_mortality_eu %>% 
  rename(`2015-19` = average_deaths_2015_2019_all_ages,
         `2020` = deaths_2020_all_ages,
         `2021` = deaths_2021_all_ages) %>% 
  pivot_longer(c(`2015-19`, `2020`, `2021`), names_to = "Okres") %>% 
  filter(!is.na(value)) %>% 
  left_join(location_population, by="location") %>% 
  mutate(value = value/population*1e6)
df %>% 
  filter(location %in% county_weekly$location[1:16]) %>% 
  ggplot(aes(time, value, color=Okres))+
  geom_line()+
  facet_wrap(vars(location))

df %>% 
  filter(location %in% county_weekly$location[17:31]) %>% 
  ggplot(aes(time, value, color=Okres))+
  geom_line()+
  facet_wrap(vars(location))

df %>% 
  filter(location %in% county_monthly$location) %>% 
  ggplot(aes(time, value, color=Okres))+
  geom_line()+
  facet_wrap(vars(location))

df %>% 
  filter(location == "Poland") %>% 
  ggplot(aes(time, value, color=Okres))+
  geom_line()

Zadanie 4

Dla selected_locations_eu oraz Polski, na podstawie owid_covid_data_eu, wykonać:

selected_data = owid_covid_data_eu %>% 
  filter(location %in% c(selected_locations_eu$location, "Poland"))
df = selected_data %>% 
  filter(!is.na(new_cases_smoothed_per_million) & !is.na(date))

df_max = df %>% 
  group_by(location) %>% 
  filter(new_cases_smoothed_per_million == max(new_cases_smoothed_per_million)) %>% 
  select(location, date, new_cases_smoothed_per_million)

df %>% 
  ggplot(aes(date, new_cases_smoothed_per_million, color=location, label=paste(date)))+
  geom_line()+
  geom_point(data = df_max, shape=23)+
  geom_label_repel(data = df_max, nudge_x = -100)

df = selected_data %>% 
  filter(!is.na(new_deaths_smoothed_per_million) & !is.na(date))

df_max = df %>% 
  group_by(location) %>% 
  filter(new_deaths_smoothed_per_million == max(new_deaths_smoothed_per_million)) %>% 
  select(location, date, new_deaths_smoothed_per_million)

df %>% 
  ggplot(aes(date, new_deaths_smoothed_per_million, 
             color=location, label=paste(date)))+
  geom_line()+
  geom_point(data = df_max, shape=23)+
  geom_label_repel(data = df_max, nudge_x = -100)

df = selected_data %>% 
  filter(!is.na(new_people_vaccinated_smoothed_per_hundred) & !is.na(date))

df_max = df %>% 
  group_by(location) %>% 
  filter(new_people_vaccinated_smoothed_per_hundred == max(new_people_vaccinated_smoothed_per_hundred)) %>% 
  select(location, date, new_people_vaccinated_smoothed_per_hundred)

df %>% 
  ggplot(aes(date, new_people_vaccinated_smoothed_per_hundred, 
             color=location, label=paste(date)))+
  geom_line()+
  geom_point(data = df_max, shape=23)+
  geom_label_repel(data = df_max, nudge_x = -100)

- wizualizację w czasie stosunku (ilorazu) dziennej liczby wykonanych testów do dziennej liczby nowych przypadków.

df = selected_data %>% 
  filter(!is.na(new_tests_smoothed_per_thousand) & !is.na(date) & 
           !is.na(new_cases_smoothed_per_million)) %>% 
  mutate(test_to_cases = new_tests_smoothed_per_thousand/new_cases_smoothed_per_million)


df_max = df %>% 
  group_by(location) %>% 
  filter(test_to_cases == max(test_to_cases)) %>% 
  select(location, date, test_to_cases)

df %>% 
  ggplot(aes(date, test_to_cases, 
             color=location, label=paste(date)))+
  geom_line()+
  geom_point(data = df_max, shape=23)+
  geom_label_repel(data = df_max, nudge_x = -100)