Desafio

Reproduzir uma cópia gráfico abaixo com o R.

fonte: The Economist

fonte: The Economist

Resp:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
dataset <- read_excel("dados_desafio.xlsx")
head(dataset)
## # A tibble: 6 × 5
##   Country     HDI_Rank HDI   CPI   Region           
##   <chr>          <dbl> <chr> <chr> <chr>            
## 1 Afghanistan      172 0.398 1.5   Asia Pacific     
## 2 Albania           70 0.739 3.1   East EU Cemt Asia
## 3 Algeria           96 0.698 2.9   MENA             
## 4 Angola           148 0.486 2     SSA              
## 5 Argentina         45 0.797 3     Americas         
## 6 Armenia           86 0.716 2.6   East EU Cemt Asia
tail(dataset)
## # A tibble: 6 × 5
##   Country    HDI_Rank HDI   CPI   Region           
##   <chr>         <dbl> <chr> <chr> <chr>            
## 1 Uzbekistan      115 0.641 1.6   East EU Cemt Asia
## 2 Vanuatu         125 0.617 3.5   Asia Pacific     
## 3 Venezuela        73 0.735 1.9   Americas         
## 4 Yemen           154 0.462 2.1   MENA             
## 5 Zambia          164 0.43  3.2   SSA              
## 6 Zimbabwe        173 0.376 2.2   SSA
str(dataset)
## tibble [173 × 5] (S3: tbl_df/tbl/data.frame)
##  $ Country : chr [1:173] "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ HDI_Rank: num [1:173] 172 70 96 148 45 86 2 19 91 53 ...
##  $ HDI     : chr [1:173] "0.398" "0.739" "0.698" "0.486" ...
##  $ CPI     : chr [1:173] "1.5" "3.1" "2.9" "2" ...
##  $ Region  : chr [1:173] "Asia Pacific" "East EU Cemt Asia" "MENA" "SSA" ...
dataset$CPI <- as.numeric(dataset$CPI)
dataset$HDI <- as.numeric(dataset$HDI)
require("ggrepel")
## Carregando pacotes exigidos: ggrepel
summary(dataset$CPI)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.500   2.500   3.200   4.052   5.100   9.500
g <- ggplot(mapping = aes(x=CPI, y=HDI), data = dataset) +
  geom_point(aes(colour=factor(Region)), 
             shape=21, stroke=1.5, size=3, fill = NA) + 
  geom_text_repel(data=dataset %>% 
                    filter(dataset$CPI > 1  | dataset$CPI < 10) %>%
                             sample_n(18),
                  aes(label=Country))+
  scale_x_continuous(limits = c(1, 10.1), 
                     breaks = seq(1, 10.1, 1)) + 
  scale_y_continuous(limits = c(0.1, 1), 
                     breaks = seq(0.2, 1, 0.1)) +
  geom_smooth(method = lm, 
              formula = y ~ splines::bs(x, 3), 
              se = FALSE, color="red", size = 2) +
  labs(title = expression(paste(bold('Corruption and human development'))),
       y = expression(paste(italic('Human Development Index, 2011 (1=best)'))),
       x = expression(paste(italic('Corruption Perception Index, 2011 (10=least corrupt)'))),
       caption = "Sources: Transparency International; UN Human Development Report",
       color = ""
       ) +
  scale_color_manual(name="",
                       labels= c("Americas","Asia &\nOceania","Central &\nEastern Europe", 
                                 "OECD", "Middle East &\nNorth Africa","Sub-Saharan\n Africa"),
                       values=c("blue","cyan","green",
                                "darkgreen","red","brown")) +  
  theme_classic() +
  guides( col = guide_legend(nrow = 1))+
  theme(axis.title=element_text(size=8, face="italic"),
        plot.caption = element_text(hjust = 0.0, size=8),
        axis.line.y = element_blank(), 
        axis.line.x = element_line(lineend="round"),
        axis.ticks.length=unit(0.2, "lines"), 
        axis.ticks.y = element_line(colour= "gray", size=1),
        legend.background=element_rect(fill="white", colour=NA),
        legend.position='top', 
        legend.justification='left',
        legend.direction='horizontal',
        legend.title=element_text(size=rel(0.8), face="bold", hjust=0), 
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major.y = element_line(colour= "gray",size=1),
        plot.margin=unit(c(1, 1, 0.5, 0.5), "lines"),
        plot.title=element_text(size=rel(1.2)),
        strip.background=element_rect(fill="grey90", colour="grey50"),
        strip.text.y=element_text(size=rel(0.8), angle=-90))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
g