Import library

library(ggplot2)
library(ggthemes)
library(GGally)
library(ggrepel)
library(dplyr)
library(datapasta)
library(tibble)

Data Collection

The data used in this plot is sourced from pdf, so we can copy the data using tribble() from tribble package

data<-tibble::tribble(
              ~V1, ~V2, ~V3, ~V4, ~V5,
  "United States", 47L, 58L, 56L, 61L,
         "Canada", 73L, 79L, 78L, 82L,
         "France", 47L, 54L, 62L, 73L,
        "Germany", 72L, 77L, 75L, 85L,
         "Greece", 18L, 25L, 22L, 42L,
        "Hungary", 42L, 49L, 54L, 63L,
          "Italy", 36L, 46L, 45L, 60L,
    "Netherlands", 74L, 82L, 82L, 89L,
         "Poland", 40L, 43L, 43L, 59L,
          "Spain", 33L, 48L, 48L, 63L,
         "Sweden", 66L, 78L, 78L, 86L,
             "UK", 52L, 64L, 63L, 74L,
         "Russia", 55L, 68L, 60L, 79L,
      "Australia", 48L, 60L, 57L, 65L,
          "India", 65L, 72L, 80L, 72L,
      "Indonesia", 77L, 85L, 85L, 89L,
          "Japan", 55L, 55L, 65L, 74L,
    "Philippines", 78L, 83L, 86L, 87L,
    "South Korea", 27L, 26L, 36L, 44L,
        "Vietnam", 78L, 78L, 80L, 85L,
         "Israel", 42L, 50L, 63L, 78L,
         "Jordan", 46L, 47L, 55L, 60L,
        "Lebanon", 36L, 62L, 48L, 73L,
        "Tunisia", 48L, 52L, 56L, 58L,
         "Turkey", 57L, 73L, 65L, 74L,
          "Ghana", 68L, 70L, 77L, 81L,
          "Kenya", 70L, 75L, 80L, 82L,
        "Nigeria", 67L, 68L, 71L, 74L,
        "Senegal", 75L, 75L, 72L, 84L,
   "South Africa", 65L, 69L, 73L, 76L,
       "Tanzania", 83L, 89L, 93L, 92L,
      "Argentina", 37L, 38L, 45L, 56L,
         "Brazil", 45L, 54L, 57L, 66L,
          "Chile", 36L, 36L, 42L, 55L,
       "Colombia", 38L, 43L, 41L, 57L,
         "Mexico", 58L, 55L, 62L, 68L,
           "Peru", 42L, 42L, 51L, 59L,
      "Venezuela", 52L, 50L, 58L, 62L
  )

data2<-tibble::tribble(
               ~V1,            ~V2, ~V3, ~V4, ~V5, ~V6, ~V7,  ~V8,
   "United States", "Spring, 2017", 15L, 36L, 24L, 23L,  2L, 100L,
          "Canada", "Spring, 2017", 20L, 47L, 18L, 14L,  1L, 100L,
          "France", "Spring, 2017",  3L, 17L, 36L, 42L,  1L, 100L,
         "Germany", "Spring, 2017", 26L, 43L, 19L, 10L,  1L, 100L,
          "Greece", "Spring, 2017",  1L, 12L, 26L, 60L,  1L, 100L,
         "Hungary", "Spring, 2017",  9L, 48L, 25L, 16L,  2L, 100L,
           "Italy", "Spring, 2017",  1L, 25L, 47L, 26L,  2L, 100L,
     "Netherlands", "Spring, 2017", 24L, 47L, 20L,  9L,  1L, 100L,
          "Poland", "Spring, 2017", 14L, 28L, 31L, 24L,  4L, 100L,
           "Spain", "Spring, 2017",  5L, 12L, 40L, 41L,  1L, 100L,
          "Sweden", "Spring, 2017", 15L, 52L, 23L, 10L,  0L, 100L,
  "United Kingdom", "Spring, 2017", 14L, 35L, 28L, 20L,  4L, 100L,
          "Russia", "Spring, 2017", 20L, 47L, 24L,  6L,  3L, 100L,
       "Australia", "Spring, 2017",  7L, 41L, 29L, 21L,  2L, 100L,
           "India", "Spring, 2017", 39L, 46L,  7L,  4L,  4L, 100L,
       "Indonesia", "Spring, 2017", 30L, 53L, 15L,  1L,  0L, 100L,
           "Japan", "Spring, 2017",  6L, 51L, 33L,  7L,  3L, 100L,
     "Philippines", "Spring, 2017", 36L, 44L, 16L,  4L,  0L, 100L,
     "South Korea", "Spring, 2017",  2L, 21L, 50L, 25L,  2L, 100L,
         "Vietnam", "Spring, 2017", 31L, 51L, 16L,  1L,  1L, 100L,
          "Israel", "Spring, 2017", 14L, 37L, 33L, 14L,  2L, 100L,
          "Jordan", "Spring, 2017", 13L, 35L, 30L, 18L,  3L, 100L,
         "Lebanon", "Spring, 2017",  2L, 13L, 42L, 43L,  1L, 100L,
         "Tunisia", "Spring, 2017", 11L, 18L, 14L, 56L,  2L, 100L,
           "Ghana", "Spring, 2017", 51L, 19L, 13L, 13L,  4L, 100L,
           "Kenya", "Spring, 2017", 28L, 40L, 20L, 12L,  1L, 100L,
         "Nigeria", "Spring, 2017", 34L, 20L, 21L, 24L,  1L, 100L,
         "Senegal", "Spring, 2017", 37L, 23L, 22L, 16L,  2L, 100L,
    "South Africa", "Spring, 2017", 22L, 22L, 28L, 24L,  3L, 100L,
        "Tanzania", "Spring, 2017", 48L, 41L,  5L,  4L,  1L, 100L,
       "Argentina", "Spring, 2017", 10L, 12L, 41L, 34L,  3L, 100L,
          "Brazil", "Spring, 2017",  2L, 22L, 18L, 56L,  3L, 100L,
           "Chile", "Spring, 2017",  5L, 10L, 51L, 32L,  2L, 100L,
        "Colombia", "Spring, 2017",  7L,  5L, 51L, 36L,  1L, 100L,
          "Mexico", "Spring, 2017",  2L, 15L, 39L, 43L,  1L, 100L,
            "Peru", "Spring, 2017",  4L,  8L, 55L, 31L,  2L, 100L,
       "Venezuela", "Spring, 2017", 14L, 15L, 24L, 45L,  2L, 100L
  )


colnames(data)<-c("Country","Political_issues", "News about govt. leaders and officials","News accurately","Most important news events")

colnames(data2)<-c("Country","x", "A lot", "Somewhat", "Not much", "Not at all", "DK/Refused", "Total")

Feature Selection

dataset<-merge(data,data2,by="Country")
dataset$gov<-dataset$`A lot`+dataset$Somewhat
dataf<- dataset %>% select(Country,Political_issues,gov)


Asia_Oceania<-c("Australia", "India", "Indonesia", "Japan", "Philippines", "South Korea", "Vietnam")
Europe<-c("France", "Germany", "Greece", "Hungary", "Italy", "Netherlands", "Poland", "Spain", "Sweden", "UK", "Russia")
MiddleEast_NorthAfrica<-c("Israel", "Jordan", "Lebanon", "Tunisia", "Turkey")
SouthAmerica<-c("Argentina", "Brazil", "Chile", "Colombia", "Mexico", "Peru", "Venezuela")
SubSaharaAfrica<-c("Ghana", "Kenya", "Nigeria", "Senegal", "South Africa", "Tanzania")
NorthAmerica<-c("United States", "Canada")

#add region column
dataf<-mutate(dataf,region = ifelse(dataf$Country %in% Asia_Oceania, 'Asia & Oceania', 
                         ifelse(dataf$Country %in% Europe, 'Europe',
                                ifelse(dataf$Country %in% MiddleEast_NorthAfrica, 'Middle East & North Africa',
                                       ifelse(dataf$Country %in% SouthAmerica, 'South America',
                                              ifelse(dataf$Country %in% SubSaharaAfrica, 'Sub-Sahara Africa',
                                                     ifelse(dataf$Country %in% NorthAmerica, 'North America',
                                NA)))))))
head(dataf,5)
##     Country Political_issues gov         region
## 1 Argentina               37  22  South America
## 2 Australia               48  48 Asia & Oceania
## 3    Brazil               45  24  South America
## 4    Canada               73  67  North America
## 5     Chile               36  15  South America

Create Basic Plot

g1<-ggplot(data = dataf,aes(x=gov, y=Political_issues, color=region))+
     geom_point(shape = 19, size = 3, alpha = 0.8, stroke = 1)+
     scale_x_continuous(breaks = seq(0,100, by=20), limits = c(0,100))+
     scale_y_continuous(breaks = seq(0,100, by=20), limits = c(0,100))+theme_bw()
g1

Add Label and Title

g2<-g1+labs(x="Trust the national goverment to do what is right for our country",
            y="Satisfied with the news media in our country*, % of respondents",
            title="I can't get no...",
            subtitle="Trust in goverment and satifaction with media, 2017",
            caption="Source: Pew Research Centre")
g2

Add linear line

g3<-g2+geom_smooth(aes(group=1),method = "lm", formula= y+1 ~ x, se=FALSE,color="#77200b")
g3

Edit Theme

g4<-g3+theme(
        legend.key = element_rect(size = 5),
        legend.key.size = unit(0.1, 'lines'),
        legend.text = element_text(size = 7),
        legend.position = c(0,1),
        legend.justification = "left",
        legend.direction = "horizontal",
        legend.margin = margin(b=1, unit = "pt"),
        axis.title.y=element_text(face="italic", size=9),
        axis.title.x=element_text(face="italic"),
        plot.title = element_text(face = "bold", size=13),
        panel.border = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.y = element_line(size=.1, color="grey66"),
        panel.grid.minor.y = element_blank(),
        plot.subtitle = element_text(size = 9),
       plot.caption=element_text(hjust=0, vjust=0.5 )
        )

g4 <- g4 + guides(color=guide_legend(nrow=1))
g5<- g4+geom_text_repel(aes(label=Country), color="grey20", force = 2)
g5

Edit legenda

g6<-g5+scale_color_manual(name = "", values = c("#f4c379", "#6ed0f6",  "#188c81","#f25a3e", "#6fbbbf","#5e92a8"  ),
                          labels=c("Asia & Oceania", "Europe","Middle East & North Africa",  "North America",
                                   "South America", "Sub-Sahara Africa"))+
  annotate("text", x=70, y=10, label= "Line of best fit, \n correlation coefficient=87%",
           color="#77200b", hjust=0)
g6