library(tinytex)
setwd("C:/DA/hw3")
knitr::opts_chunk$set(echo = T, message = F, warning = F, cache = T)
library(ggplot2)
library(knitr)
library(ggrepel)
library(dendroextras)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(kableExtra)
Import Data
library(readxl)
Data_Shrink_Debt <- read_excel("C:/DA/hw3/Data_Shrink_Debt.xlsx",
sheet = "debt")
Data_Shrink_Debt<-Data_Shrink_Debt[-19, ]
head(Data_Shrink_Debt[,-c(10,11)]) %>%
kbl() %>%
kable_paper(bootstrap_options = "striped", full_width = F)
|
Country
|
Debt_to_GDP_Ratio_2003
|
Debt_to_GDP_Ratio_2004
|
Debt_to_GDP_Ratio_2005
|
Debt_to_GDP_Ratio_2006
|
Debt_to_GDP_Ratio_2007
|
Debt_to_GDP_Ratio_2008
|
Debt_to_GDP_Ratio_2009
|
Debt_to_GDP_Ratio_2010
|
|
Greece
|
105.8
|
108.6
|
110.6
|
107.7
|
105.7
|
110.6
|
127.0
|
147.8
|
|
Belgium
|
95.4
|
92.8
|
91.8
|
87.6
|
85.3
|
90.1
|
94.9
|
96.8
|
|
Austria
|
60.9
|
62.2
|
62.1
|
60.4
|
57.8
|
59.3
|
64.9
|
65.8
|
|
Portugal
|
58.3
|
61.0
|
66.2
|
67.7
|
66.6
|
68.9
|
78.7
|
88.0
|
|
Hungary
|
56.2
|
55.7
|
58.1
|
62.0
|
61.6
|
67.7
|
72.8
|
73.9
|
|
France
|
51.9
|
52.6
|
53.3
|
52.1
|
52.1
|
53.4
|
61.2
|
67.4
|
Visualization
ggplot(Data_Shrink_Debt, aes(Debt_to_GDP_Ratio_2003, Debt_to_GDP_Ratio_2007))+
geom_label_repel(label=Data_Shrink_Debt$Country)+
geom_point(color = 'red')+
theme_classic(base_size = 16)+
labs(x = "Debt-to-GDP Ratio 2003",
y = "Debt-to-GDP Ratio 2007",
title = "Debt-to-GDP Ratio 2003 vs 2007")

Dendrogram plot
Data_clust<-Data_Shrink_Debt %>%
select(Country, Debt_to_GDP_Ratio_2003,Debt_to_GDP_Ratio_2007)
rownames(Data_clust) <- Data_clust$Country
dt = dist(Data_clust, method = "euclidean") #Compute distance matrix
clust = hclust(dt)
par(mar = c(2,10,2,10), cex = 0.6) # set margins on 4 sides of the plot
clst1=colour_clusters(hclust(dist(Data_clust), "ave"),3,groupLabels=as.roman) #5 clusters
plot(clst1, main = "Dendrogram with 3 clusters", horiz = TRUE, fontsize_number = 20)

Heatmap
heatcolor = brewer.pal(8,"RdBu")
rownames(Data_Shrink_Debt) <- Data_Shrink_Debt$Country
colnames(Data_Shrink_Debt) <- c(2003,2004,2005,2006,2007,2008,2009,2010)
gdp = as.matrix(scale(Data_Shrink_Debt[,2:8])) #Scale to get comparable units
rownames(gdp) <- Data_clust$Country
#us_crime_bystate = as.matrix(USArrests)
clst = hclust(dist(gdp))
#Simple heatmap
pheatmap(gdp, cluster_row= F, cluster_col = F, main ="Debt-to-GDP Ratio ", color = heatcolor)
