Neste trabalho irei replicar um modelo de gráfico, recomendado como atividade 1.

1 Carregar as bibliotecas necessárias

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
library(conflicted)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
library(readr)
library(scales)
library(ggrepel)
library(patchwork)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.3.1
library(extrafont)
## Registering fonts with R

2 - Carregar o dataset EconomistData.csv

df<- read_csv("EconomistData.csv")
## Rows: 173 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Country, Region
## dbl (3): HDI.Rank, HDI, CPI
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
## # A tibble: 6 × 5
##   Country     HDI.Rank   HDI   CPI Region           
##   <chr>          <dbl> <dbl> <dbl> <chr>            
## 1 Afghanistan      172 0.398   1.5 Asia Pacific     
## 2 Albania           70 0.739   3.1 East EU Cemt Asia
## 3 Algeria           96 0.698   2.9 MENA             
## 4 Angola           148 0.486   2   SSA              
## 5 Argentina         45 0.797   3   Americas         
## 6 Armenia           86 0.716   2.6 East EU Cemt Asia
summary(df)
##    Country             HDI.Rank           HDI              CPI       
##  Length:173         Min.   :  1.00   Min.   :0.2860   Min.   :1.500  
##  Class :character   1st Qu.: 47.00   1st Qu.:0.5090   1st Qu.:2.500  
##  Mode  :character   Median : 96.00   Median :0.6980   Median :3.200  
##                     Mean   : 95.28   Mean   :0.6581   Mean   :4.052  
##                     3rd Qu.:143.00   3rd Qu.:0.7930   3rd Qu.:5.100  
##                     Max.   :187.00   Max.   :0.9430   Max.   :9.500  
##     Region         
##  Length:173        
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
## Parsed with column specification:
## cols(
##   Country = col_character(),
##   HDI.Rank = col_integer(),
##   HDI = col_double(),
##   CPI = col_double(),
##   Region = col_character()
## )
3 - Plotando gráfico de dispersão x = CPI, y = HDI, cor = Region,

plote da dispersão e realizado em duas duas fases pc1 + geom_point()

p1 <- ggplot(df, aes(x = CPI, y = HDI, color = Region)) + geom_point()

p1

3 - Adicionando linha de tendência

p2 <- p1 +
  geom_smooth(mapping = aes(linetype = "r2"),
              method = "lm",
              formula = y ~ x + log(x), se = FALSE,
              color = "red") + geom_point()
p2 

4 - Calculando o coeficiente de correlação \(r^2\) e preparando para colar no gráfico.

mR2 <- summary(lm(HDI ~ CPI + log(CPI), data = df))$r.squared * 100
mR2 <- paste0(format(mR2, digits = 3), "%")

5 - Adicionando coeficiente de correlação \(r^2\)

p3 <- p2 + scale_linetype(name = "",
                 breaks = "r2",
                 labels = list(bquote(R^2==.(mR2))),
                 guide = guide_legend(override.aes = list(linetype = 1, size = 2, color = "red"), order=2))
p3

6 - Adicionando pontos abertos ao gráfico

p4 <- p3 + geom_point(shape = 21,fill = "white", size = 2, stroke = 1.25)

p4

7 - Rotulando os pontos com seus respectivos países.

p5 <- p4 + geom_text_repel(aes(label = Country), color = "gray20",
                  data = transform(df, labels = Country %in% c("Russia", "Venezuela", "Iraq", "Mayanmar", "Sudan", "Afghanistan", "Congo", "Greece", "Argentinia", "Italy", "Brazil", "India", "China", "South Africa", "Spain", "Cape Verde", "Bhutan", "Rwanda", "France", "Botswana", "France", "US", "Germany", "Britain", "Barbados", "Japan", "Norway", "New Zealand", "Sigapore")))

p5
## Warning: ggrepel: 137 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

8 - Trocar os rótulos das regiões e ordenar

df$Region <- factor(df$Region,
                     levels = c("EU W. Europe",
                                "Americas",
                                "Asia Pacific",
                                "East EU Cemt Asia",
                                "MENA",
                                "SSA"),
                     labels = c("OECD",
                                "Americas",
                                "Asia &\nOceania",
                                "Central &\nEastern Europe",
                                "Middle East &\nnorth Africa",
                                "Sub-Saharan\nAfrica"))

9 - Aplicando o rótulos trocados dos continentes (Regiões)

p5$data <- df
p5
## Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

10 - Adicionando títulos nos eixos x e y

p6 <-  p5 + scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",
                     limits = c(.9, 10.5),
                     breaks = 1:10) +
  scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",
                     limits = c(0.2, 1.0),
                     breaks = seq(0.2, 1.0, by = 0.1)) +
  scale_color_manual(name = "",
                     values = c("#24576D",
                                "#099DD7",
                                "#28AADC",
                                "#248E84",
                                "#F2583F",
                                "#96503F")) +
  ggtitle("Corruption and Human development")

p6
## Warning: ggrepel: 153 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

11 - Adicionado temas para formatar os títulos nos eixos.

p7 <- p6 + ggtitle("Corruption and human development") +
  labs(caption="Sources: Transparency International; UN Human Development Report") +
  theme_bw() +
  theme(panel.border = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(color = "gray"),
        text = element_text(color = "gray20"),
        axis.title.x = element_text(face="italic"),
        axis.title.y = element_text(face="italic"),
        legend.position = "top",
        legend.direction = "horizontal",
        legend.box = "horizontal",
        legend.text = element_text(size = 10),
        plot.caption = element_text(hjust=0),
        plot.title = element_text(size = 16, face = "bold")) 
p7
## Warning: ggrepel: 159 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps