library(tinytex)
setwd("C:/DA/hw3")
knitr::opts_chunk$set(echo = T, message = F, warning = F, cache = T)
library(ggplot2)
library(knitr)
library(ggrepel)
library(dendroextras)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(kableExtra)

Import Data

library(readxl)
Data_Shrink_Debt <- read_excel("C:/DA/hw3/Data_Shrink_Debt.xlsx", 
                               sheet = "debt")
Data_Shrink_Debt<-Data_Shrink_Debt[-19, ]
head(Data_Shrink_Debt[,-c(10,11)]) %>%
  kbl() %>%
  kable_paper(bootstrap_options = "striped", full_width = F)
Country Debt_to_GDP_Ratio_2003 Debt_to_GDP_Ratio_2004 Debt_to_GDP_Ratio_2005 Debt_to_GDP_Ratio_2006 Debt_to_GDP_Ratio_2007 Debt_to_GDP_Ratio_2008 Debt_to_GDP_Ratio_2009 Debt_to_GDP_Ratio_2010
Greece 105.8 108.6 110.6 107.7 105.7 110.6 127.0 147.8
Belgium 95.4 92.8 91.8 87.6 85.3 90.1 94.9 96.8
Austria 60.9 62.2 62.1 60.4 57.8 59.3 64.9 65.8
Portugal 58.3 61.0 66.2 67.7 66.6 68.9 78.7 88.0
Hungary 56.2 55.7 58.1 62.0 61.6 67.7 72.8 73.9
France 51.9 52.6 53.3 52.1 52.1 53.4 61.2 67.4

Visualization

ggplot(Data_Shrink_Debt, aes(Debt_to_GDP_Ratio_2003, Debt_to_GDP_Ratio_2007))+
  geom_label_repel(label=Data_Shrink_Debt$Country)+
  geom_point(color = 'red')+
  theme_classic(base_size = 16)+
  labs(x = "Debt-to-GDP Ratio 2003", 
       y = "Debt-to-GDP Ratio 2007", 
       title = "Debt-to-GDP Ratio 2003 vs 2007")

Dendrogram plot

Data_clust<-Data_Shrink_Debt %>% 
  select(Country, Debt_to_GDP_Ratio_2003,Debt_to_GDP_Ratio_2007)
rownames(Data_clust) <- Data_clust$Country
dt = dist(Data_clust, method = "euclidean") #Compute distance matrix
clust = hclust(dt)
par(mar = c(2,10,2,10), cex = 0.6) # set margins on 4 sides of the plot
clst1=colour_clusters(hclust(dist(Data_clust), "ave"),3,groupLabels=as.roman) #5 clusters
plot(clst1, main = "Dendrogram with 3 clusters", horiz = TRUE, fontsize_number = 20)

Heatmap

heatcolor = brewer.pal(8,"RdBu")
rownames(Data_Shrink_Debt) <- Data_Shrink_Debt$Country
colnames(Data_Shrink_Debt) <- c(2003,2004,2005,2006,2007,2008,2009,2010)
gdp = as.matrix(scale(Data_Shrink_Debt[,2:8])) #Scale to get comparable units 
rownames(gdp) <- Data_clust$Country
#us_crime_bystate = as.matrix(USArrests)
clst = hclust(dist(gdp))
#Simple heatmap
pheatmap(gdp, cluster_row= F, cluster_col = F, main ="Debt-to-GDP Ratio ", color = heatcolor)