library(dplyr)
library(ggplot2)
library(wesanderson)
library(scales)
library(tidyr)
library(lubridate)
library(ggbump)wes6_1 <- c("#d3d3d3", "#F4B5BD", "#C6CDF7", "#972D15", "#81A88D", "#FDD262")
wes11 <- c("#FDD262", "#F4B5BD", "#CDC08C", "#D8B70A", "#02401B", "#81A88D",
"#972D15", "#85D4E3", "#C6CDF7", "#7294D4", "#D3DDDC")
wes15 <- c("#FDD262", "#F4B5BD", "#CDC08C", "#D8B70A", "#02401B", "#A2A475",
"#81A88D", "#972D15", "#E6A0C4", "#C6CDF7", "#D8A499", "#7294D4",
"#446455", "#C7B19C", "#D3DDDC")
wes19 <- c("#191970", "#F4B5BD", "#9C964A", "#CDC08C", "#FAD77B", "#D8B70A",
"#02401B", "#A2A475", "#81A88D", "#972D15", "#E6A0C4", "#C6CDF7",
"#D8A499", "#7294D4", "#446455", "#85D4E3", "#C7B19C", "#ffa700",
"#555555", "#1e90ff")PN <- read.csv(
"/media/vegard/Work_volume_10TB1/2025_PK-dashboard_v2/poppunk/microreact/microreact_all_merged_metadata.csv",
header = TRUE, sep = ","
)
# Fjern "__autocolour" fra kolonnenavn
colnames(PN) <- gsub("__autocolour", "", colnames(PN))
# Serotype: foretrekk Quellung, fall tilbake til WGS
PN$Serotype <- ifelse(PN$Quellung_serotype != "", PN$Quellung_serotype, PN$WGS_serotype)PCV13 <- c('4','6B','9V','14','18C','19F','23F','1','3','5','6A','7F','19A')
PCV15 <- c('4','6B','9V','14','18C','19F','23F','1','3','5','6A','7F','19A','22F','33F')
PCV20 <- c('1','3','4','5','6A','6B','7F','8','9V','10A','11A','12F','14','15B',
'18C','19A','19F','22F','23F','33F')
PCV21 <- c('3','6A','7F','8','9N','10A','11A','12F','15A','15C','16F','17F','19A',
'20A','22F','23A','23B','24F','31','33F','35B')
PPV23 <- c('1','2','3','4','5','6B','7F','8','9N','9V','10A','11A','12F','14','15B',
'17F','18C','19F','19A','20','22F','23F','33F')
PN$PCV13 <- ifelse(PN$Serotype %in% PCV13, "YES", "NO")
PN$PCV15 <- ifelse(PN$Serotype %in% PCV15, "YES", "NO")
PN$PCV20 <- ifelse(PN$Serotype %in% PCV20, "YES", "NO")
PN$PCV21 <- ifelse(PN$Serotype %in% PCV21, "YES", "NO")
PN$PPV23 <- ifelse(PN$Serotype %in% PPV23, "YES", "NO")age_order <- c("0-6", "7-15", "16-44", "45-66", "67-79", "80+")
PN$age_group <- factor(PN$age_group, levels = age_order)
PN <- PN %>%
mutate(
date = as.Date(date),
year = factor(year(date), levels = sort(unique(year(date))))
) %>%
filter(!is.na(year))top10_GPSC <- names(sort(table(PN$GPSC), decreasing = TRUE))[1:10]
top10_ST <- names(sort(table(PN$ST[!is.na(PN$ST) & PN$ST != ""]), decreasing = TRUE))[1:10]
top10_Serotype <- names(sort(table(PN$Serotype[!is.na(PN$Serotype) & PN$Serotype != ""]), decreasing = TRUE))[1:10]
PN <- PN %>%
mutate(
GPSC_dominant = if_else(GPSC %in% top10_GPSC, GPSC, "other"),
ST_dominant = if_else(as.character(ST) %in% top10_ST, as.character(ST), "other"),
Sero_dominant = if_else(as.character(Serotype) %in% top10_Serotype, as.character(Serotype), "other")
)pcv_counts <- PN %>%
group_by(year, PCV13, PCV15, PCV20, PCV21, PPV23) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(year) %>%
summarise(
PCV13 = sum(count[PCV13 == "YES"]),
PCV15 = sum(count[PCV15 == "YES"]),
PCV20 = sum(count[PCV20 == "YES"]),
PCV21 = sum(count[PCV21 == "YES"]),
PPV23 = sum(count[PPV23 == "YES"]),
Cases_total = sum(count)
) %>%
pivot_longer(
cols = c(PCV13, PCV15, PCV20, PCV21, PPV23, Cases_total),
names_to = "CoveredBy",
values_to = "Count"
)
ggplot(pcv_counts, aes(x = year, y = Count, fill = CoveredBy, group = CoveredBy)) +
geom_bar(stat = "identity", position = position_dodge()) +
geom_text(aes(label = Count), vjust = 1.6, color = "white",
position = position_dodge(0.9), size = 4) +
labs(
title = "Fig1 - Teoretisk vaksinedekning utvalgte vaksiner",
x = "",
y = "Antall",
fill = "Vaksine"
) +
theme_minimal() +
scale_fill_manual(values = wes6_1)gpsc_by_year <- PN %>%
filter(!is.na(year)) %>%
count(year, GPSC_dominant) %>%
group_by(year) %>%
mutate(fraction = n / sum(n))
ggplot(gpsc_by_year, aes(x = factor(year), y = fraction, fill = GPSC_dominant)) +
geom_bar(stat = "identity", position = "stack") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
scale_fill_manual(values = wes11) +
labs(
title = "Fig2 - Største GPSC grupper (n > 50 totalt) per år",
x = "",
y = "Fraksjon",
fill = "GPSC"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank()
)GPSC_ranks <- PN %>%
filter(GPSC %in% top10_GPSC) %>%
mutate(year = as.factor(year)) %>%
group_by(GPSC, year) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(year) %>%
mutate(rank = rank(-count, ties.method = "min")) %>%
ungroup()
ggplot(GPSC_ranks, aes(x = year, y = rank, color = GPSC, group = GPSC)) +
geom_bump(linewidth = 1.5, smooth = 8) +
geom_point(size = 3) +
scale_y_reverse(breaks = 1:10) +
scale_color_manual(values = wes11) +
labs(
title = "Fig3 - GPSC utvikling (topp 10)",
x = "",
y = "Rank",
color = "GPSC"
) +
theme_minimal() +
theme(
legend.position = "right",
axis.text.x = element_text(angle = 45, hjust = 1)
)Sero_by_year <- PN %>%
filter(!is.na(year)) %>%
group_by(year, Sero_dominant) %>%
summarise(n = n(), .groups = "drop_last") %>%
mutate(fraction = n / sum(n)) %>%
ungroup()
ggplot(Sero_by_year, aes(x = factor(year), y = fraction, fill = Sero_dominant)) +
geom_bar(stat = "identity", position = "stack") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
scale_fill_manual(values = wes11) +
labs(
title = "Fig4 - Serotypar per år",
x = "År",
y = "Fraksjon",
fill = "Vanlegaste serotyper (topp 10)"
) +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Sero_ranks <- PN %>%
filter(Serotype %in% top10_Serotype) %>%
mutate(year = as.factor(year)) %>%
group_by(Serotype, year) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(year) %>%
mutate(rank = rank(-count, ties.method = "min")) %>%
ungroup()
ggplot(Sero_ranks, aes(x = year, y = rank, color = Serotype, group = Serotype)) +
geom_bump(linewidth = 1.5, smooth = 8) +
geom_point(size = 3) +
scale_y_reverse(breaks = 1:10) +
scale_color_manual(values = wes11) +
labs(
title = "Fig5 - Serotype utvikling (topp 10)",
x = "",
y = "Rank",
color = "Serotype"
) +
theme_minimal() +
theme(
legend.position = "right",
axis.text.x = element_text(angle = 45, hjust = 1)
)top20_Serotype <- names(sort(table(
PN$Serotype[!is.na(PN$Serotype) & PN$Serotype != ""]
), decreasing = TRUE))[1:20]
sero_age <- PN %>%
filter(Serotype %in% top20_Serotype, !is.na(age_group)) %>%
count(Serotype, age_group) %>%
group_by(Serotype) %>%
mutate(fraction = n / sum(n)) %>%
ungroup() %>%
mutate(Serotype = factor(Serotype, levels = top20_Serotype))
ggplot(sero_age, aes(x = age_group, y = fraction, fill = age_group)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ Serotype, ncol = 4) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
scale_fill_manual(values = wes6_1) +
labs(
title = "Fig6 - Aldersfordeling og serotypar",
x = "Aldersgruppe",
y = "Fraksjon"
) +
theme_bw(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(face = "bold"),
panel.grid.major.x = element_blank()
)age_sero <- PN %>%
filter(!is.na(age_group), !is.na(Serotype), Serotype != "", !is.na(year)) %>%
mutate(
Sero20 = if_else(Serotype %in% top20_Serotype, Serotype, "Andre")
) %>%
count(year, age_group, Sero20) %>%
mutate(
Sero20 = factor(Sero20, levels = c(top20_Serotype, "Andre"))
)
sero20_colors <- setNames(
c(wes19[seq_along(top20_Serotype)], "#d3d3d3"),
c(top20_Serotype, "Andre")
)
pA <- ggplot(age_sero, aes(x = age_group, y = n, fill = Sero20)) +
geom_col(position = "stack") +
scale_fill_manual(values = sero20_colors, name = "Serotype") +
scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
facet_wrap(~ year, ncol = 1) +
labs(
title = "Fig7A) Absolutte tal",
x = "Aldersgruppe",
y = "Antal tilfelle"
) +
theme_bw(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
legend.position = "none"
)
age_sero_frac <- age_sero %>%
group_by(year, age_group) %>%
mutate(fraction = n / sum(n)) %>%
ungroup()
pB <- ggplot(age_sero_frac, aes(x = age_group, y = fraction, fill = Sero20)) +
geom_col(position = "stack") +
scale_fill_manual(values = sero20_colors, name = "Serotype") +
scale_y_continuous(labels = percent_format(accuracy = 1),
expand = expansion(mult = c(0, 0.02))) +
facet_wrap(~ year, ncol = 1) +
labs(
title = "Fig7B) Relativ fraksjon",
x = "Aldersgruppe",
y = "Fraksjon"
) +
theme_bw(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major.x = element_blank(),
legend.position = "right",
legend.key.size = unit(0.4, "cm"),
legend.text = element_text(size = 8)
)
library(patchwork)
pA + pB + plot_layout(widths = c(1, 1))total_per_year <- PN %>%
group_by(year) %>%
summarise(total = n())
mic_per_year <- PN %>%
filter(!is.na(year)) %>% # Exclude NA years here
group_by(year, BEN_MIC, ST, GPSC, Serotype) %>%
summarise(count = n(), .groups = "drop")
mic_proportions <- mic_per_year %>%
left_join(total_per_year, by = "year") %>%
mutate(proportion = count / total,
ST = as.factor(ST))
high_mic_proportions <- subset(mic_proportions, BEN_MIC >= 2)mic_labels <- mic_proportions %>%
group_by(year, BEN_MIC) %>%
summarise(
n_total = sum(count),
proportion = sum(count) / first(total),
.groups = "drop"
)
ggplot(mic_proportions, aes(x = factor(BEN_MIC), y = proportion, fill = year)) +
geom_col() +
geom_text(data = mic_labels,
aes(x = factor(BEN_MIC), y = proportion, label = n_total),
vjust = -0.3, size = 3, color = "black", inherit.aes = FALSE) +
scale_x_discrete(limits = c("0.12", "0.25", "0.5", "1", "2", "4", "8")) +
scale_fill_manual(values = wes15) +
facet_wrap(~year, nrow = 3) +
labs(
title = "Fig8 - Andel av PNS pneumokokkisolat fordelt på MIC",
x = "Penicillin MIC",
y = "Andel av isolat",
fill = "År"
) +
theme_bw() +
ylim(0,0.1) +
theme(legend.position = "right")ggplot(high_mic_proportions, aes(x = factor(year), y = proportion, fill = ST)) +
geom_col() +
scale_fill_manual(values = wes19) +
labs(
title = "Fig9 - Penicillinresistente pneumokokker: sekvenstyper",
x = "År",
y = "Andel av isolat",
fill = "ST"
) +
theme_bw() +
theme(legend.position = "right")ggplot(high_mic_proportions, aes(x = factor(year), y = proportion, fill = GPSC)) +
geom_col() +
scale_fill_manual(values = wes19) +
labs(
title = "Fig10 - GPSC-fordeling blant PRP-tilfeller",
x = "År",
y = "Andel av isolat",
fill = "GPSC"
) +
theme_bw() +
theme(legend.position = "right")ggplot(high_mic_proportions, aes(x = factor(year), y = proportion, fill = Serotype)) +
geom_col() +
scale_fill_manual(values = wes19) +
labs(
title = "Fig11 - Serotype-fordeling blant PRP-tilfeller",
x = "År",
y = "Andel av isolat",
fill = "Serotype"
) +
theme_bw() +
theme(legend.position = "right")