clustering excess deaths alone (using Euclidean distance)

Plotting excess deaths in all country

  • 69 countries were included after merging with other available datasets

  • Last updated date was selected to be December 2021

  • All excess deaths by country were aggregated into monthly values

ggplot(ex_country, aes(date, excess_deaths,color=location)) +
  geom_line(stat="identity") +
  ylab("Projected Excess Deaths") +
  gghighlight(max(excess_deaths) > 90000,
              max_highlight = 4,
              use_direct_label = TRUE) +
  theme_minimal() +
  theme(legend.position = 'none')

Fitting Hierarchical clustering using Euclidean distance

# transpose excess deaths to matrix
deaths <- t(ex_per_country[-1])
deaths_dist <- proxy::dist(deaths, method="Euclidean")
ex_cluster_fit <- hclust(deaths_dist, method = "ward.D")

Clustered Excess Deaths Dendrogram (Euclidean)

Plot each cluster

## 
##  1  2  3  4 
## 53  3 12  1

Clustering Excess Deaths using Dynamic Time Warp (DTW)

Fit dtw hierarchical clustering

#deaths <- t(ex_per_country[-1])
# normalize data
deaths.norm <- BBmisc::normalize(deaths, method="standardize")
deaths_dist_norm <- dtw::dtwDist(deaths.norm) # calculate dtw distance
deaths_dist_norm <- as.dist(deaths_dist_norm) # convert to dist object
ex_cluster_fit2 <- hclust(deaths_dist_norm, method = "ward.D")

ex_dendrogram<- ggdendro::ggdendrogram(ex_cluster_fit2, rotate=TRUE, theme_dendro = FALSE) + theme_minimal() + xlab("") + ylab("")
ggplotly(ex_dendrogram)
library(cluster)
deaths_sil_width <- c(NA)
for(i in 2:8){  
  deaths_pam_fit <- pam(deaths_dist_norm, diss = TRUE, k = i)  
  deaths_sil_width[i] <- deaths_pam_fit$silinfo$avg.width  
}
plot(1:8, deaths_sil_width,
     xlab = "Number of clusters",
     ylab = "Silhouette Width")
lines(1:8, deaths_sil_width)

plot each cluster

ex_cluster1 <- joined_clusters2 %>% filter(cluster == "1") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Excess Deaths Cluster 1")
ggplotly(ex_cluster1)
ex_cluster2 <- joined_clusters2 %>% filter(cluster == "2") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Excess Deaths Cluster 2")
ggplotly(ex_cluster2)
ex_cluster3<- joined_clusters2 %>% filter(cluster == "3") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 3")
ggplotly(ex_cluster3)
ex_cluster4 <- joined_clusters2 %>% filter(cluster == "4") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 4")
ggplotly(ex_cluster4)
ex_cluster5 <- joined_clusters2 %>% filter(cluster == "5") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 5")
ggplotly(ex_cluster5)
ex_cluster6 <- joined_clusters2 %>% filter(cluster == "6") %>% 
  ggplot(aes(date, excess_deaths)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("excess deaths") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales = "free")+
  ggtitle("Excess Deaths Cluster 6")
ggplotly(ex_cluster6)

clustering policy stringency alone

Plotting policy stringency index by country

Fit dtw hierarchical clustering and plot dendrogram

# long to wide
stringency_per_country <- month_stringency[,c("monthly_stringency","time_unit","location")] %>% 
  spread(location, monthly_stringency) %>% 
  drop_na()
policy <- t(stringency_per_country[-1])
# normalize data
policy.norm <- BBmisc::normalize(policy, method="standardize")
policy_dist_norm <- dtw::dtwDist(policy.norm) # calculate dtw distance
policy_dist_norm <- as.dist(policy_dist_norm) # convert to dist object
policy_cluster_fit <- hclust(policy_dist_norm, method = "ward.D")
policy_dendrogram <- ggdendro::ggdendrogram(policy_cluster_fit, rotate=TRUE, theme_dendro = FALSE) + theme_minimal() + xlab("") + ylab("")
ggplotly(policy_dendrogram)
policy_sil_width <- c(NA)
for(i in 2:8){  
  policy_pam_fit <- pam(policy_dist_norm, diss = TRUE, k = i)  
  policy_sil_width[i] <- policy_pam_fit$silinfo$avg.width  
}
plot(1:8, policy_sil_width,
     xlab = "Number of clusters",
     ylab = "Silhouette Width")
lines(1:8, policy_sil_width)

Plot policy stringency clusters

policy_cluster1 <-
  joined_clusters_policy %>% filter(cluster == "1") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 1")
ggplotly(policy_cluster1)
policy_cluster2 <-
  joined_clusters_policy %>% filter(cluster == "2") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 2")
ggplotly(policy_cluster2)
policy_cluster3 <-
  joined_clusters_policy %>% filter(cluster == "3") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 3")
ggplotly(policy_cluster3)
policy_cluster4 <-
  joined_clusters_policy %>% filter(cluster == "4") %>% 
  ggplot(aes(date, monthly_stringency)) +
  geom_line(color="grey") +
  theme_minimal() +
  ylab("policy stringency") + xlab("") +
  geom_smooth(method="auto",color="red", se=F, size=0.5) +
  facet_wrap(~location, scales="free")+
  ggtitle("Policy Stringency Cluster 4")
ggplotly(policy_cluster4)