excess_deaths2

clustering excess deaths alone (using Euclidean distance)

Plotting excess deaths in all country

69 countries were included after merging with other available datasets
Last updated date was selected to be December 2021
All excess deaths by country were aggregated into monthly values

ggplot(ex_country, aes(date, excess_deaths,color=location)) +
  geom_line(stat="identity") +
  ylab("Projected Excess Deaths") +
  gghighlight(max(excess_deaths) > 90000,
              max_highlight = 4,
              use_direct_label = TRUE) +
  theme_minimal() +
  theme(legend.position = 'none')

Fitting Hierarchical clustering using Euclidean distance

# transpose excess deaths to matrix
deaths <- t(ex_per_country[-1])
deaths_dist <- proxy::dist(deaths, method="Euclidean")
ex_cluster_fit <- hclust(deaths_dist, method = "ward.D")

Clustered Excess Deaths Dendrogram (Euclidean)

Plot each cluster

## 
##  1  2  3  4 
## 53  3 12  1

Clustering Excess Deaths using Dynamic Time Warp (DTW)

Fit dtw hierarchical clustering

#deaths <- t(ex_per_country[-1])
# normalize data
deaths.norm <- BBmisc::normalize(deaths, method="standardize")
deaths_dist_norm <- dtw::dtwDist(deaths.norm) # calculate dtw distance
deaths_dist_norm <- as.dist(deaths_dist_norm) # convert to dist object
ex_cluster_fit2 <- hclust(deaths_dist_norm, method = "ward.D")

ex_dendrogram<- ggdendro::ggdendrogram(ex_cluster_fit2, rotate=TRUE, theme_dendro = FALSE) + theme_minimal() + xlab("") + ylab("")
ggplotly(ex_dendrogram)

library(cluster)
deaths_sil_width <- c(NA)
for(i in 2:8){  
  deaths_pam_fit <- pam(deaths_dist_norm, diss = TRUE, k = i)  
  deaths_sil_width[i] <- deaths_pam_fit$silinfo$avg.width  
}
plot(1:8, deaths_sil_width,
     xlab = "Number of clusters",
     ylab = "Silhouette Width")
lines(1:8, deaths_sil_width)

plot each cluster

ex_cluster1 <- joined_clusters2 %>% filter(cluster == "1") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Excess Deaths Cluster 1")
ggplotly(ex_cluster1)

ex_cluster2 <- joined_clusters2 %>% filter(cluster == "2") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Excess Deaths Cluster 2")
ggplotly(ex_cluster2)

ex_cluster3<- joined_clusters2 %>% filter(cluster == "3") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 3")
ggplotly(ex_cluster3)

ex_cluster4 <- joined_clusters2 %>% filter(cluster == "4") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 4")
ggplotly(ex_cluster4)

ex_cluster5 <- joined_clusters2 %>% filter(cluster == "5") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 5")
ggplotly(ex_cluster5)

ex_cluster6 <- joined_clusters2 %>% filter(cluster == "6") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 6")
ggplotly(ex_cluster6)

clustering policy stringency alone

Plotting policy stringency index by country

Fit dtw hierarchical clustering and plot dendrogram

# long to wide
stringency_per_country <- month_stringency[,c("monthly_stringency","time_unit","location")] %>% 
  spread(location, monthly_stringency) %>% 
  drop_na()
policy <- t(stringency_per_country[-1])
# normalize data
policy.norm <- BBmisc::normalize(policy, method="standardize")
policy_dist_norm <- dtw::dtwDist(policy.norm) # calculate dtw distance
policy_dist_norm <- as.dist(policy_dist_norm) # convert to dist object
policy_cluster_fit <- hclust(policy_dist_norm, method = "ward.D")
policy_dendrogram <- ggdendro::ggdendrogram(policy_cluster_fit, rotate=TRUE, theme_dendro = FALSE) + theme_minimal() + xlab("") + ylab("")
ggplotly(policy_dendrogram)

policy_sil_width <- c(NA)
for(i in 2:8){  
  policy_pam_fit <- pam(policy_dist_norm, diss = TRUE, k = i)  
  policy_sil_width[i] <- policy_pam_fit$silinfo$avg.width  
}
plot(1:8, policy_sil_width,
     xlab = "Number of clusters",
     ylab = "Silhouette Width")
lines(1:8, policy_sil_width)

Plot policy stringency clusters

policy_cluster1 <-
  joined_clusters_policy %>% filter(cluster == "1") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 1")
ggplotly(policy_cluster1)

policy_cluster2 <-
  joined_clusters_policy %>% filter(cluster == "2") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 2")
ggplotly(policy_cluster2)

policy_cluster3 <-
  joined_clusters_policy %>% filter(cluster == "3") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 3")
ggplotly(policy_cluster3)

policy_cluster4 <-
  joined_clusters_policy %>% filter(cluster == "4") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 4")
ggplotly(policy_cluster4)