Wczytanie potrzebnych bibliotek
library(kableExtra)
library(tidyverse)
library(readr)
library(scales)
library(ggrepel)
library(lubridate)
Wczytanie danych oraz przygotowanie wszelkich pomocniczych ramek danych.
owid_covid_data <- read_csv("owid_covid_data.csv", show_col_types = FALSE)
excess_mortality <- read_csv("excess_mortality.csv", show_col_types = FALSE)
continents = owid_covid_data %>% filter(!is.na(continent)) %>% distinct(continent)
locations = owid_covid_data %>% filter(!is.na(continent)) %>% distinct(location)
eu = owid_covid_data %>% filter(continent == "Europe") %>% distinct(location)
owid_covid_data_eu = owid_covid_data %>% filter(location %in% eu$location)
excess_mortality_eu = excess_mortality %>% filter(location %in% eu$location)
set.seed(123)
selected_locations_eu = eu %>% filter(location != "Poland") %>% slice_sample(n=3)
selected_period = seq(dmy('01.05.2021'), dmy('31.12.2021'), by=1)
selected_date = seq(dmy('01.05.2021'), dmy('31.12.2021'), by=1) %>% sample(1)
location_population = owid_covid_data %>% select(location, population) %>%
distinct(location, population)
Na podstawie owid_covid_data obliczyć średnią liczbę
wykonanych dziennie szczepień (w przeliczeniu na milion mieszkańców) w
każdym kraju w okresie od 01.05.2021r. do 31.12.2021r. Podsumowanie
zaprezentować na wykresie pudełkowym z podziałem na
kontynenty.
Dane z wybranego okresu oraz z poszczególnych krajów świata.
selected_data = owid_covid_data %>%
filter(date %in% selected_period & location %in% locations$location)
Podsumowanie w obrębie kontynentów oraz krajów.
sum_selected_data = selected_data %>%
group_by(continent, location) %>%
summarise(mean_nvspm = mean(new_vaccinations_smoothed_per_million, na.rm = T), .groups = "keep") %>%
filter(!is.nan(mean_nvspm)) %>%
ungroup() %>%
arrange(desc(mean_nvspm))
sum_selected_data
## # A tibble: 218 × 3
## continent location mean_nvspm
## <chr> <chr> <dbl>
## 1 Africa Saint Helena 15915
## 2 Oceania Tokelau 12718.
## 3 North America Cuba 11910.
## 4 Oceania Pitcairn 10892.
## 5 Oceania Niue 10645.
## 6 Asia Brunei 8003.
## 7 Asia South Korea 7863.
## 8 North America Bonaire Sint Eustatius and Saba 7412
## 9 Asia China 7227.
## 10 Europe Denmark 7099.
## # … with 208 more rows
sum_selected_data %>%
ggplot(aes(continent, mean_nvspm, fill=continent))+
geom_boxplot()
owid_covid_data_eu, na dzień
selected_date, przeanalizować zależność między (wszystkie
dane w przeliczeniu na milion mieszkańców)selected_data = owid_covid_data_eu %>%
filter(date == selected_date & location %in% locations$location)
selected_data %>%
filter(!is.na(population) & !is.na(aged_70_older) & !is.na(total_deaths_per_million)) %>%
ggplot(aes(population*aged_70_older,total_deaths_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks()
selected_data %>%
filter(!is.na(median_age) & !is.na(total_deaths_per_million)) %>%
ggplot(aes(median_age,total_deaths_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks(sides ="l")
selected_data %>%
filter(!is.na(population_density) & !is.na(total_deaths_per_million)) %>%
ggplot(aes(population_density,total_deaths_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks()
selected_data %>%
filter(!is.na(total_tests_per_thousand) & !is.na(total_cases_per_million)) %>%
ggplot(aes(total_tests_per_thousand, total_cases_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks(sides ="b")
selected_data %>%
filter(!is.na(total_tests_per_thousand) & !is.na(total_deaths_per_million)) %>%
ggplot(aes(total_tests_per_thousand, total_deaths_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks(sides ="b")
selected_data %>%
filter(!is.na(icu_patients_per_million) & !is.na(new_cases_smoothed_per_million)) %>%
ggplot(aes(icu_patients_per_million, new_cases_smoothed_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks(sides ="b")
selected_data %>%
filter(!is.na(icu_patients_per_million) & !is.na(new_deaths_smoothed_per_million)) %>%
ggplot(aes(icu_patients_per_million, new_deaths_smoothed_per_million))+
geom_point()+
geom_smooth(method = 'loess', formula = 'y ~ x')+
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x))) +
annotation_logticks(sides ="b")
Na podstawie excess_mortality_eu porównać liczby zgonów
w poszczególnych tygodniach w latach 2020 i 2021 ze średnią za lata 2015
- 2019 w podziale na wszystkie kraje Europy. Wszystkie dane
przeliczyć na milion mieszkańców (wielkość populacji można
znaleźć w ramce owid_covid_data_eu).
county_monthly = excess_mortality_eu %>%
filter(time_unit == "monthly") %>%
distinct(location)
county_weekly = excess_mortality_eu %>%
filter(time_unit == "weekly") %>%
distinct(location)
df = excess_mortality_eu %>%
rename(`2015-19` = average_deaths_2015_2019_all_ages,
`2020` = deaths_2020_all_ages,
`2021` = deaths_2021_all_ages) %>%
pivot_longer(c(`2015-19`, `2020`, `2021`), names_to = "Okres") %>%
filter(!is.na(value)) %>%
left_join(location_population, by="location") %>%
mutate(value = value/population*1e6)
df %>%
filter(location %in% county_weekly$location[1:16]) %>%
ggplot(aes(time, value, color=Okres))+
geom_line()+
facet_wrap(vars(location))
df %>%
filter(location %in% county_weekly$location[17:31]) %>%
ggplot(aes(time, value, color=Okres))+
geom_line()+
facet_wrap(vars(location))
df %>%
filter(location %in% county_monthly$location) %>%
ggplot(aes(time, value, color=Okres))+
geom_line()+
facet_wrap(vars(location))
df %>%
filter(location == "Poland") %>%
ggplot(aes(time, value, color=Okres))+
geom_line()
Dla selected_locations_eu oraz Polski, na podstawie
owid_covid_data_eu, wykonać:
selected_data = owid_covid_data_eu %>%
filter(location %in% c(selected_locations_eu$location, "Poland"))
df = selected_data %>%
filter(!is.na(new_cases_smoothed_per_million) & !is.na(date))
df_max = df %>%
group_by(location) %>%
filter(new_cases_smoothed_per_million == max(new_cases_smoothed_per_million)) %>%
select(location, date, new_cases_smoothed_per_million)
df %>%
ggplot(aes(date, new_cases_smoothed_per_million, color=location, label=paste(date)))+
geom_line()+
geom_point(data = df_max, shape=23)+
geom_label_repel(data = df_max, nudge_x = -100)
df = selected_data %>%
filter(!is.na(new_deaths_smoothed_per_million) & !is.na(date))
df_max = df %>%
group_by(location) %>%
filter(new_deaths_smoothed_per_million == max(new_deaths_smoothed_per_million)) %>%
select(location, date, new_deaths_smoothed_per_million)
df %>%
ggplot(aes(date, new_deaths_smoothed_per_million,
color=location, label=paste(date)))+
geom_line()+
geom_point(data = df_max, shape=23)+
geom_label_repel(data = df_max, nudge_x = -100)
df = selected_data %>%
filter(!is.na(new_people_vaccinated_smoothed_per_hundred) & !is.na(date))
df_max = df %>%
group_by(location) %>%
filter(new_people_vaccinated_smoothed_per_hundred == max(new_people_vaccinated_smoothed_per_hundred)) %>%
select(location, date, new_people_vaccinated_smoothed_per_hundred)
df %>%
ggplot(aes(date, new_people_vaccinated_smoothed_per_hundred,
color=location, label=paste(date)))+
geom_line()+
geom_point(data = df_max, shape=23)+
geom_label_repel(data = df_max, nudge_x = -100)
- wizualizację w czasie stosunku (ilorazu) dziennej liczby wykonanych
testów do dziennej liczby nowych przypadków.
df = selected_data %>%
filter(!is.na(new_tests_smoothed_per_thousand) & !is.na(date) &
!is.na(new_cases_smoothed_per_million)) %>%
mutate(test_to_cases = new_tests_smoothed_per_thousand/new_cases_smoothed_per_million)
df_max = df %>%
group_by(location) %>%
filter(test_to_cases == max(test_to_cases)) %>%
select(location, date, test_to_cases)
df %>%
ggplot(aes(date, test_to_cases,
color=location, label=paste(date)))+
geom_line()+
geom_point(data = df_max, shape=23)+
geom_label_repel(data = df_max, nudge_x = -100)