This is the final analysis. All the datasets have been pre-processing
into the df_filtered.rds file
EDA
Hoy many patients?
n_distinct(df_filtered$pid) %>%
knitr::kable()
How many interventions?
df_filtered %>%
select(ep_beig_dat, manipulation_type, periods) %>%
# remove first semester 2020
filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
# remove first semester 2021
filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
# count
select(-ep_beig_dat) %>%
mutate(manipulation_type = fct_drop(manipulation_type)) %>%
group_by(manipulation_type, periods) %>%
count() %>%
pivot_wider(
names_from = periods,
,
names_prefix = "y",
values_from = n
) %>%
mutate("Variation 2020" = (y2020 - y2019) / y2019 * 100) %>%
mutate("Variation 2021" = (y2021 - y2019) / y2019 * 100) %>%
knitr::kable()
Dental hygiene |
88729 |
62588 |
80901 |
-29.46162 |
-8.822369 |
Examination |
52853 |
38395 |
51622 |
-27.35512 |
-2.329102 |
Fluoride applications |
61618 |
45085 |
58352 |
-26.83145 |
-5.300399 |
Hygiene instructions |
62224 |
45382 |
58933 |
-27.06673 |
-5.288956 |
NA

df_filtered %>%
# remove first semester 2020
filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
# remove first semester 2021
filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
# mutate(manipulation_type = fct_lump_prop(manipulation_type, prop = .001)) %>%
# mutate(manipulation_type = replace_na(manipulation_type, "Other")) %>%
filter(!is.na(manipulation_type)) %>%
group_by(manipulation_type, periods) %>%
count() %>%
ggplot(aes(x = as.factor(periods),
y = n,
color = manipulation_type,
group = manipulation_type)) +
geom_line() +
scale_y_log10(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
labs(title = "Manipulations per year (first semester)",
x = "Year",
y = "N (x1000)",
color = "Manipulation")

How many specialties per year
Filter according to https://docs.google.com/document/d/17c5QXkbnCI9aEFDIblwdOLP-zfS5e9NFxVn5TjOTXEE/edit#heading=h.nmf14n
P25 |
General dentist |
P26 |
Maxillofacial surgeon |
|
|
P32 |
radiologist |
A253 |
Paediatric dentist |
A251 |
orthodontist |
n11 |
Dental hygienist |
df_filtered %>%
filter(spec_kods %in% c("P25", "P26", "P32", "A253", "A251", "n11")) %>%
group_by(periods, spec_kods) %>%
count() %>%
pivot_wider(names_from = periods,
values_from = n) %>%
knitr::kable()
n11 |
264889 |
465554 |
507680 |
P26 |
534 |
59 |
NA |
P32 |
1 |
NA |
NA |
Manipulation per date
Check the dates
df_filtered %>%
# remove first semester 2020
filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
# remove first semester 2021
filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
# group_by(year, month, day, day_week) %>%
# count() %>%
group_by(month = lubridate::floor_date(ep_beig_dat, unit = "month")) %>%
count() %>%
mutate(month = lubridate::ymd(month)) %>%
ggplot(aes(x = month,
y = n,
group = 1)) +
geom_point() +
geom_line() +
scale_y_log10(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
scale_x_date(NULL, date_labels = "%b %y", breaks = "month",
) +
# scale_x_date(limits = as.Date(c("2019-01-01", "2021-12-31"))) +
theme(axis.text.x = element_text(size = 8, angle = 45, colour = "black",
vjust = 1, hjust = 1)) +
labs(title = "Manipulations per day, 2019-2021",
y = "Manipulations (log 10)",
x = "Date")

ggsave(here("Figs2019_2021", "fig1.tif"), width = 6, height = 4, device='tiff', dpi=300)
Manipulations
df_filtered %>%
select(ep_beig_dat, manipulation_type, periods) %>%
# remove first semester 2020
filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
# remove first semester 2021
filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
# count
group_by(manipulation_type, periods) %>%
count() %>%
mutate(error_estimate = sqrt(n)) %>%
# create the graph
ggplot(aes(
x = as.factor(periods),
y = n,
fill = manipulation_type
)) +
geom_col(position="dodge") + # the position="dodge" allow to have the bars by side
# scale_fill_manual(values = c("#C2C1C1", "#89272F")) +
scale_fill_viridis_d() +
# scale_fill_manual(values = c("grey90", "grey60", "grey30", "grey10")) +
# scale_y_log10(labels = scales::label_number()) +
labs(
# title = "Total of manipulations during the first semester",
x = "Year (First Semester)",
y = "Manipulations",
fill = ""
) +
theme(legend.position="top") +
geom_errorbar(
aes(
ymin = n - error_estimate * 4,
ymax = n + error_estimate * 4
),
width = .2,
position = position_dodge(.9)
)

ggsave(here("Figs2019_2021", "fig2.tif"), width = 7, height = 4, device='tiff', dpi=300)
Manipulations per day
df_filtered %>%
mutate(ep_beig_dat = lubridate::ymd(ep_beig_dat)) %>%
group_by(ep_beig_dat, manipulation_type) %>% # add kodes_grouped to see the kodes
count() %>%
# so far, here are the manipulations per day
# count() %>% # here is the number of manipulacijas per day
# add the name of the day
mutate(day = lubridate::wday(
lubridate::ymd(ep_beig_dat),
label = TRUE,
abbr = TRUE
)) %>%
ggplot(aes(x = ep_beig_dat,
y = n,
color = day)) +
geom_point() +
labs(
title = "Manipulations per day of the week",
y = "Manipulations (n)",
x = "Date",
color = "Day of the week"
) +
scale_fill_viridis_b() +
theme(legend.position="top")

ggsave(here("Figs2019_2021", "fig3.tif"), width = 7, height = 4, device='tiff', dpi=300)
Manipulations per type
df_filtered %>%
mutate(ep_beig_dat = lubridate::ymd(ep_beig_dat)) %>%
group_by(ep_beig_dat, manipulation_type) %>% # add kodes_grouped to see the kodes
count() %>%
# so far, here are the manipulations per day
# count() %>% # here is the number of manipulacijas per day
# add the name of the day
mutate(day = lubridate::wday(
lubridate::ymd(ep_beig_dat),
label = TRUE,
abbr = TRUE
)) %>%
ggplot(aes(x = ep_beig_dat,
y = n,
color = day)) +
geom_point() +
labs(
title = "Manipulations per day of the week",
y = "Manipulations n",
x = "Date",
color = "Day of the week"
) +
# scale_y_log10() +
scale_fill_viridis_b() +
facet_grid(manipulation_type ~ . ) +
theme(legend.position = "top") +
theme(strip.text.y = element_text(size = 8))

ggsave(here("Figs2019_2021", "fig4.tif"), width = 8, height = 6, device='tiff', dpi=300)
---
title: "COVID Latvia 2019 til 2020"
output: 
  html_notebook: 
    toc: yes
    toc_float: true
    fig_caption: true
    code_folding: hide
---

```{r}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE)
```

This is the final analysis. All the datasets have been pre-processing into the df_filtered.rds file

# Packages

```{r}
pacman::p_load(tidyverse, # data science tools
               janitor, # data cleaning and tabyl
               sf,  # for maps
               gtsummary,  # for tables and regression summary
               ggpubr, # for ggplot themes
               scales, # for ggplot scales
               here) # files paths
```

```{r}
theme_set(theme_minimal())
```

# 2. START FROM HERE Load the RDS file

```{r}
# df <- readRDS(here("datasets", "df.rds"))
# test with new dataset
# df <- readRDS(here("datasets", "df_with_all_2021.rds"))
df_filtered <- readRDS(here("datasets", "df_filtered.rds"))

```

## Filter dates to avoid dates from the second semester 2019

```{r}
df_filtered <- df_filtered %>%
  filter(ep_beig_dat > "2018-12-31") %>% 
  filter(!between(ep_beig_dat, as.Date('2019-06-30'), as.Date('2019-12-31')))

```

#### Codification

# EDA

Hoy many patients?

```{r}
n_distinct(df_filtered$pid) %>% 
  knitr::kable()
```

How many interventions?

```{r}

df_filtered %>%
  select(ep_beig_dat, manipulation_type, periods) %>%
  # remove first semester 2020
  filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
  # remove first semester 2021
  filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
  # count
  select(-ep_beig_dat) %>%
  mutate(manipulation_type = fct_drop(manipulation_type)) %>%
  group_by(manipulation_type, periods) %>%
  count() %>%
  pivot_wider(
    names_from = periods,
    ,
    names_prefix = "y",
    values_from = n
  ) %>%
  mutate("Variation 2020" = (y2020 - y2019) / y2019 * 100) %>% 
  mutate("Variation 2021" = (y2021 - y2019) / y2019 * 100) %>% 
  knitr::kable()

```

```{r}
df_filtered %>%
  select(ep_beig_dat, manipulation_type, periods) %>%
  # remove first semester 2020
  filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
  # remove first semester 2021
  filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
  # count
  select(-ep_beig_dat) %>%
  mutate(manipulation_type = fct_drop(manipulation_type)) %>%
  group_by(manipulation_type, periods) %>%
  count() %>%
  pivot_wider(
    names_from = periods,
    ,
    names_prefix = "y",
    values_from = n
  ) %>%
  mutate("Variation_2020" = (y2020 - y2019) / y2019 * 100) %>% 
  mutate("Variation_2021" = (y2021 - y2019) / y2019 * 100) %>% 
  select(manipulation_type, Variation_2020, Variation_2021 ) %>% 
  pivot_longer(Variation_2020:Variation_2021, 
               names_to = "Period", 
               values_to = "n") %>% 
  ggplot(aes(x = Period, 
             y = n, 
             fill = manipulation_type)) + 
   geom_col(position="dodge") + # the position="dodge" allow to have the bars by side
  # scale_fill_manual(values = c("#C2C1C1", "#89272F")) +
  scale_fill_viridis_d()  + 
  labs(title = "Comparison to 2019", 
       fill = "Manipulations", 
       y = "Percentage Change")+
  theme(legend.position = "top")
  
```

```{r}
df_filtered %>% 
    # remove first semester 2020
  filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
  # remove first semester 2021
  filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
  # mutate(manipulation_type = fct_lump_prop(manipulation_type, prop = .001)) %>% 
  # mutate(manipulation_type = replace_na(manipulation_type, "Other")) %>% 
  filter(!is.na(manipulation_type)) %>% 
  group_by(manipulation_type, periods) %>% 
  count() %>% 
  ggplot(aes(x = as.factor(periods), 
             y = n, 
             color = manipulation_type, 
             group = manipulation_type)) +
  geom_line() +
  scale_y_log10(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  labs(title = "Manipulations per year (first semester)", 
       x = "Year", 
       y = "N (x1000)", 
       color = "Manipulation")
```

How many specialties per year

Filter according to <https://docs.google.com/document/d/17c5QXkbnCI9aEFDIblwdOLP-zfS5e9NFxVn5TjOTXEE/edit#heading=h.nmf14n>

| Codes | Explication           |
|-------|-----------------------|
| P25   | General dentist       |
| P26   | Maxillofacial surgeon |
|       |                       |
| P32   | radiologist           |
| A253  | Paediatric dentist    |
| A251  | orthodontist          |
| n11   | Dental hygienist      |

```{r}
df_filtered %>% 
  filter(spec_kods %in% c("P25", "P26", "P32", "A253", "A251", "n11")) %>% 
  group_by(periods, spec_kods) %>% 
  count() %>% 
  pivot_wider(names_from = periods, 
              values_from = n) %>% 
  knitr::kable()
```

## Manipulation per date

Check the dates

```{r}
df_filtered %>%
  # remove first semester 2020
  filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>%
  # remove first semester 2021
  filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>%
  # group_by(year, month, day, day_week) %>%
  # count() %>%
  group_by(month = lubridate::floor_date(ep_beig_dat, unit = "month")) %>%
  count() %>%
  mutate(month = lubridate::ymd(month)) %>%
  
  ggplot(aes(x = month,
             y = n,
             group = 1)) +
  geom_point() +
  geom_line() +
  scale_y_log10(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  scale_x_date(NULL, date_labels = "%b %y", breaks = "month", 
               ) +
  # scale_x_date(limits = as.Date(c("2019-01-01", "2021-12-31"))) +
  theme(axis.text.x  = element_text(size = 8, angle = 45, colour = "black",
      vjust = 1, hjust = 1)) + 
  labs(title = "Manipulations per day, 2019-2021",
       y = "Manipulations (log 10)",
       x = "Date")
```

```{r}
ggsave(here("Figs2019_2021", "fig1.tif"), width = 6, height = 4, device='tiff', dpi=300)
```

## Manipulations

```{r}
df_filtered %>%
  select(ep_beig_dat, manipulation_type, periods) %>% 
  # remove first semester 2020
  filter(!between(ep_beig_dat, as.Date('2020-06-30'), as.Date('2020-12-31'))) %>% 
  # remove first semester 2021
 filter(!between(ep_beig_dat, as.Date('2021-06-30'), as.Date('2021-12-31'))) %>% 
  # count
  group_by(manipulation_type, periods) %>% 
  count() %>%
  mutate(error_estimate = sqrt(n)) %>% 
  # create the graph
  ggplot(aes(
    x = as.factor(periods), 
    y = n, 
    fill = manipulation_type
    )) +
   geom_col(position="dodge") + # the position="dodge" allow to have the bars by side
  # scale_fill_manual(values = c("#C2C1C1", "#89272F")) +
  scale_fill_viridis_d() +
  # scale_fill_manual(values = c("grey90", "grey60", "grey30", "grey10")) +
  # scale_y_log10(labels = scales::label_number()) +
    labs(
      # title = "Total of manipulations during the first semester", 
      x = "Year (First Semester)", 
      y = "Manipulations", 
      fill = ""
    )  +
  theme(legend.position="top") + 
  geom_errorbar(
    aes(
      ymin = n - error_estimate * 4,
      ymax = n + error_estimate * 4
    ),
    width = .2,
    position = position_dodge(.9)
  ) 
```

```{r}
ggsave(here("Figs2019_2021", "fig2.tif"), width = 7, height = 4, device='tiff', dpi=300)
```

## Manipulations per day

```{r}
df_filtered %>%
  mutate(ep_beig_dat = lubridate::ymd(ep_beig_dat)) %>%
  group_by(ep_beig_dat, manipulation_type) %>% # add kodes_grouped to see the kodes
  count() %>%
  # so far, here are the manipulations per day
  # count() %>%   # here is the number of manipulacijas per day
  # add the name of the day
  mutate(day = lubridate::wday(
    lubridate::ymd(ep_beig_dat),
    label = TRUE,
    abbr = TRUE
  )) %>%
  ggplot(aes(x = ep_beig_dat,
             y = n,
             color = day)) +
  geom_point() +
  labs(
    title = "Manipulations per day of the week",
    y = "Manipulations (n)",
    x = "Date",
    color = "Day of the week"
  ) +
  scale_fill_viridis_b() +
  theme(legend.position="top")
```

```{r}
ggsave(here("Figs2019_2021", "fig3.tif"), width = 7, height = 4, device='tiff', dpi=300)
```

## Manipulations per type

```{r}
df_filtered %>%
  mutate(ep_beig_dat = lubridate::ymd(ep_beig_dat)) %>%
  group_by(ep_beig_dat, manipulation_type) %>% # add kodes_grouped to see the kodes
  count() %>%
  # so far, here are the manipulations per day
  # count() %>%   # here is the number of manipulacijas per day
  # add the name of the day
  mutate(day = lubridate::wday(
    lubridate::ymd(ep_beig_dat),
    label = TRUE,
    abbr = TRUE
  )) %>%
  ggplot(aes(x = ep_beig_dat,
             y = n,
             color = day)) +
  geom_point() +
  labs(
    title = "Manipulations per day of the week",
    y = "Manipulations n",
    x = "Date",
    color = "Day of the week"
  ) +
  # scale_y_log10() + 
  scale_fill_viridis_b() + 
  facet_grid(manipulation_type ~ . ) + 
  theme(legend.position = "top") +
  theme(strip.text.y = element_text(size = 8))
```

```{r}
ggsave(here("Figs2019_2021", "fig4.tif"), width = 8, height = 6, device='tiff', dpi=300)
```
