1 Carregar as bibliotecas necessárias
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(conflicted)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
library(readr)
library(scales)
library(ggrepel)
library(patchwork)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.1
library(extrafont)
## Registering fonts with R
2 - Carregar o dataset EconomistData.csv
df<- read_csv("EconomistData.csv")
## Rows: 173 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Country, Region
## dbl (3): HDI.Rank, HDI, CPI
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
## # A tibble: 6 × 5
## Country HDI.Rank HDI CPI Region
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 Afghanistan 172 0.398 1.5 Asia Pacific
## 2 Albania 70 0.739 3.1 East EU Cemt Asia
## 3 Algeria 96 0.698 2.9 MENA
## 4 Angola 148 0.486 2 SSA
## 5 Argentina 45 0.797 3 Americas
## 6 Armenia 86 0.716 2.6 East EU Cemt Asia
summary(df)
## Country HDI.Rank HDI CPI
## Length:173 Min. : 1.00 Min. :0.2860 Min. :1.500
## Class :character 1st Qu.: 47.00 1st Qu.:0.5090 1st Qu.:2.500
## Mode :character Median : 96.00 Median :0.6980 Median :3.200
## Mean : 95.28 Mean :0.6581 Mean :4.052
## 3rd Qu.:143.00 3rd Qu.:0.7930 3rd Qu.:5.100
## Max. :187.00 Max. :0.9430 Max. :9.500
## Region
## Length:173
## Class :character
## Mode :character
##
##
##
## Parsed with column specification:
## cols(
## Country = col_character(),
## HDI.Rank = col_integer(),
## HDI = col_double(),
## CPI = col_double(),
## Region = col_character()
## )
plote da dispersão e realizado em duas duas fases pc1 + geom_point()
p1 <- ggplot(df, aes(x = CPI, y = HDI, color = Region)) + geom_point()
p1
3 - Adicionando linha de tendência
p2 <- p1 +
geom_smooth(mapping = aes(linetype = "r2"),
method = "lm",
formula = y ~ x + log(x), se = FALSE,
color = "red") + geom_point()
p2
4 - Calculando o coeficiente de correlação \(r^2\) e preparando para colar no gráfico.
mR2 <- summary(lm(HDI ~ CPI + log(CPI), data = df))$r.squared * 100
mR2 <- paste0(format(mR2, digits = 3), "%")
5 - Adicionando coeficiente de correlação \(r^2\)
p3 <- p2 + scale_linetype(name = "",
breaks = "r2",
labels = list(bquote(R^2==.(mR2))),
guide = guide_legend(override.aes = list(linetype = 1, size = 2, color = "red"), order=2))
p3
6 - Adicionando pontos abertos ao gráfico
p4 <- p3 + geom_point(shape = 21,fill = "white", size = 2, stroke = 1.25)
p4
7 - Rotulando os pontos com seus respectivos paÃses.
p5 <- p4 + geom_text_repel(aes(label = Country), color = "gray20",
data = transform(df, labels = Country %in% c("Russia", "Venezuela", "Iraq", "Mayanmar", "Sudan", "Afghanistan", "Congo", "Greece", "Argentinia", "Italy", "Brazil", "India", "China", "South Africa", "Spain", "Cape Verde", "Bhutan", "Rwanda", "France", "Botswana", "France", "US", "Germany", "Britain", "Barbados", "Japan", "Norway", "New Zealand", "Sigapore")))
p5
## Warning: ggrepel: 137 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
8 - Trocar os rótulos das regiões e ordenar
df$Region <- factor(df$Region,
levels = c("EU W. Europe",
"Americas",
"Asia Pacific",
"East EU Cemt Asia",
"MENA",
"SSA"),
labels = c("OECD",
"Americas",
"Asia &\nOceania",
"Central &\nEastern Europe",
"Middle East &\nnorth Africa",
"Sub-Saharan\nAfrica"))
9 - Aplicando o rótulos trocados dos continentes (Regiões)
p5$data <- df
p5
## Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
10 - Adicionando tÃtulos nos eixos x e y
p6 <- p5 + scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",
limits = c(.9, 10.5),
breaks = 1:10) +
scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",
limits = c(0.2, 1.0),
breaks = seq(0.2, 1.0, by = 0.1)) +
scale_color_manual(name = "",
values = c("#24576D",
"#099DD7",
"#28AADC",
"#248E84",
"#F2583F",
"#96503F")) +
ggtitle("Corruption and Human development")
p6
## Warning: ggrepel: 153 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
11 - Adicionado temas para formatar os tÃtulos nos eixos.
p7 <- p6 + ggtitle("Corruption and human development") +
labs(caption="Sources: Transparency International; UN Human Development Report") +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid = element_blank(),
panel.grid.major.y = element_line(color = "gray"),
text = element_text(color = "gray20"),
axis.title.x = element_text(face="italic"),
axis.title.y = element_text(face="italic"),
legend.position = "top",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.text = element_text(size = 10),
plot.caption = element_text(hjust=0),
plot.title = element_text(size = 16, face = "bold"))
p7
## Warning: ggrepel: 159 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps