Import library
library(ggplot2)
library(ggthemes)
library(GGally)
library(ggrepel)
library(dplyr)
library(datapasta)
library(tibble)Data Collection
The data used in this plot is sourced from pdf, so we can copy the data using tribble() from tribble package
data<-tibble::tribble(
~V1, ~V2, ~V3, ~V4, ~V5,
"United States", 47L, 58L, 56L, 61L,
"Canada", 73L, 79L, 78L, 82L,
"France", 47L, 54L, 62L, 73L,
"Germany", 72L, 77L, 75L, 85L,
"Greece", 18L, 25L, 22L, 42L,
"Hungary", 42L, 49L, 54L, 63L,
"Italy", 36L, 46L, 45L, 60L,
"Netherlands", 74L, 82L, 82L, 89L,
"Poland", 40L, 43L, 43L, 59L,
"Spain", 33L, 48L, 48L, 63L,
"Sweden", 66L, 78L, 78L, 86L,
"UK", 52L, 64L, 63L, 74L,
"Russia", 55L, 68L, 60L, 79L,
"Australia", 48L, 60L, 57L, 65L,
"India", 65L, 72L, 80L, 72L,
"Indonesia", 77L, 85L, 85L, 89L,
"Japan", 55L, 55L, 65L, 74L,
"Philippines", 78L, 83L, 86L, 87L,
"South Korea", 27L, 26L, 36L, 44L,
"Vietnam", 78L, 78L, 80L, 85L,
"Israel", 42L, 50L, 63L, 78L,
"Jordan", 46L, 47L, 55L, 60L,
"Lebanon", 36L, 62L, 48L, 73L,
"Tunisia", 48L, 52L, 56L, 58L,
"Turkey", 57L, 73L, 65L, 74L,
"Ghana", 68L, 70L, 77L, 81L,
"Kenya", 70L, 75L, 80L, 82L,
"Nigeria", 67L, 68L, 71L, 74L,
"Senegal", 75L, 75L, 72L, 84L,
"South Africa", 65L, 69L, 73L, 76L,
"Tanzania", 83L, 89L, 93L, 92L,
"Argentina", 37L, 38L, 45L, 56L,
"Brazil", 45L, 54L, 57L, 66L,
"Chile", 36L, 36L, 42L, 55L,
"Colombia", 38L, 43L, 41L, 57L,
"Mexico", 58L, 55L, 62L, 68L,
"Peru", 42L, 42L, 51L, 59L,
"Venezuela", 52L, 50L, 58L, 62L
)
data2<-tibble::tribble(
~V1, ~V2, ~V3, ~V4, ~V5, ~V6, ~V7, ~V8,
"United States", "Spring, 2017", 15L, 36L, 24L, 23L, 2L, 100L,
"Canada", "Spring, 2017", 20L, 47L, 18L, 14L, 1L, 100L,
"France", "Spring, 2017", 3L, 17L, 36L, 42L, 1L, 100L,
"Germany", "Spring, 2017", 26L, 43L, 19L, 10L, 1L, 100L,
"Greece", "Spring, 2017", 1L, 12L, 26L, 60L, 1L, 100L,
"Hungary", "Spring, 2017", 9L, 48L, 25L, 16L, 2L, 100L,
"Italy", "Spring, 2017", 1L, 25L, 47L, 26L, 2L, 100L,
"Netherlands", "Spring, 2017", 24L, 47L, 20L, 9L, 1L, 100L,
"Poland", "Spring, 2017", 14L, 28L, 31L, 24L, 4L, 100L,
"Spain", "Spring, 2017", 5L, 12L, 40L, 41L, 1L, 100L,
"Sweden", "Spring, 2017", 15L, 52L, 23L, 10L, 0L, 100L,
"United Kingdom", "Spring, 2017", 14L, 35L, 28L, 20L, 4L, 100L,
"Russia", "Spring, 2017", 20L, 47L, 24L, 6L, 3L, 100L,
"Australia", "Spring, 2017", 7L, 41L, 29L, 21L, 2L, 100L,
"India", "Spring, 2017", 39L, 46L, 7L, 4L, 4L, 100L,
"Indonesia", "Spring, 2017", 30L, 53L, 15L, 1L, 0L, 100L,
"Japan", "Spring, 2017", 6L, 51L, 33L, 7L, 3L, 100L,
"Philippines", "Spring, 2017", 36L, 44L, 16L, 4L, 0L, 100L,
"South Korea", "Spring, 2017", 2L, 21L, 50L, 25L, 2L, 100L,
"Vietnam", "Spring, 2017", 31L, 51L, 16L, 1L, 1L, 100L,
"Israel", "Spring, 2017", 14L, 37L, 33L, 14L, 2L, 100L,
"Jordan", "Spring, 2017", 13L, 35L, 30L, 18L, 3L, 100L,
"Lebanon", "Spring, 2017", 2L, 13L, 42L, 43L, 1L, 100L,
"Tunisia", "Spring, 2017", 11L, 18L, 14L, 56L, 2L, 100L,
"Ghana", "Spring, 2017", 51L, 19L, 13L, 13L, 4L, 100L,
"Kenya", "Spring, 2017", 28L, 40L, 20L, 12L, 1L, 100L,
"Nigeria", "Spring, 2017", 34L, 20L, 21L, 24L, 1L, 100L,
"Senegal", "Spring, 2017", 37L, 23L, 22L, 16L, 2L, 100L,
"South Africa", "Spring, 2017", 22L, 22L, 28L, 24L, 3L, 100L,
"Tanzania", "Spring, 2017", 48L, 41L, 5L, 4L, 1L, 100L,
"Argentina", "Spring, 2017", 10L, 12L, 41L, 34L, 3L, 100L,
"Brazil", "Spring, 2017", 2L, 22L, 18L, 56L, 3L, 100L,
"Chile", "Spring, 2017", 5L, 10L, 51L, 32L, 2L, 100L,
"Colombia", "Spring, 2017", 7L, 5L, 51L, 36L, 1L, 100L,
"Mexico", "Spring, 2017", 2L, 15L, 39L, 43L, 1L, 100L,
"Peru", "Spring, 2017", 4L, 8L, 55L, 31L, 2L, 100L,
"Venezuela", "Spring, 2017", 14L, 15L, 24L, 45L, 2L, 100L
)
colnames(data)<-c("Country","Political_issues", "News about govt. leaders and officials","News accurately","Most important news events")
colnames(data2)<-c("Country","x", "A lot", "Somewhat", "Not much", "Not at all", "DK/Refused", "Total")Feature Selection
dataset<-merge(data,data2,by="Country")
dataset$gov<-dataset$`A lot`+dataset$Somewhat
dataf<- dataset %>% select(Country,Political_issues,gov)
Asia_Oceania<-c("Australia", "India", "Indonesia", "Japan", "Philippines", "South Korea", "Vietnam")
Europe<-c("France", "Germany", "Greece", "Hungary", "Italy", "Netherlands", "Poland", "Spain", "Sweden", "UK", "Russia")
MiddleEast_NorthAfrica<-c("Israel", "Jordan", "Lebanon", "Tunisia", "Turkey")
SouthAmerica<-c("Argentina", "Brazil", "Chile", "Colombia", "Mexico", "Peru", "Venezuela")
SubSaharaAfrica<-c("Ghana", "Kenya", "Nigeria", "Senegal", "South Africa", "Tanzania")
NorthAmerica<-c("United States", "Canada")
#add region column
dataf<-mutate(dataf,region = ifelse(dataf$Country %in% Asia_Oceania, 'Asia & Oceania',
ifelse(dataf$Country %in% Europe, 'Europe',
ifelse(dataf$Country %in% MiddleEast_NorthAfrica, 'Middle East & North Africa',
ifelse(dataf$Country %in% SouthAmerica, 'South America',
ifelse(dataf$Country %in% SubSaharaAfrica, 'Sub-Sahara Africa',
ifelse(dataf$Country %in% NorthAmerica, 'North America',
NA)))))))
head(dataf,5)## Country Political_issues gov region
## 1 Argentina 37 22 South America
## 2 Australia 48 48 Asia & Oceania
## 3 Brazil 45 24 South America
## 4 Canada 73 67 North America
## 5 Chile 36 15 South America
Create Basic Plot
g1<-ggplot(data = dataf,aes(x=gov, y=Political_issues, color=region))+
geom_point(shape = 19, size = 3, alpha = 0.8, stroke = 1)+
scale_x_continuous(breaks = seq(0,100, by=20), limits = c(0,100))+
scale_y_continuous(breaks = seq(0,100, by=20), limits = c(0,100))+theme_bw()
g1Add Label and Title
g2<-g1+labs(x="Trust the national goverment to do what is right for our country",
y="Satisfied with the news media in our country*, % of respondents",
title="I can't get no...",
subtitle="Trust in goverment and satifaction with media, 2017",
caption="Source: Pew Research Centre")
g2Add linear line
g3<-g2+geom_smooth(aes(group=1),method = "lm", formula= y+1 ~ x, se=FALSE,color="#77200b")
g3Edit Theme
g4<-g3+theme(
legend.key = element_rect(size = 5),
legend.key.size = unit(0.1, 'lines'),
legend.text = element_text(size = 7),
legend.position = c(0,1),
legend.justification = "left",
legend.direction = "horizontal",
legend.margin = margin(b=1, unit = "pt"),
axis.title.y=element_text(face="italic", size=9),
axis.title.x=element_text(face="italic"),
plot.title = element_text(face = "bold", size=13),
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(size=.1, color="grey66"),
panel.grid.minor.y = element_blank(),
plot.subtitle = element_text(size = 9),
plot.caption=element_text(hjust=0, vjust=0.5 )
)
g4 <- g4 + guides(color=guide_legend(nrow=1))
g5<- g4+geom_text_repel(aes(label=Country), color="grey20", force = 2)
g5Edit legenda
g6<-g5+scale_color_manual(name = "", values = c("#f4c379", "#6ed0f6", "#188c81","#f25a3e", "#6fbbbf","#5e92a8" ),
labels=c("Asia & Oceania", "Europe","Middle East & North Africa", "North America",
"South America", "Sub-Sahara Africa"))+
annotate("text", x=70, y=10, label= "Line of best fit, \n correlation coefficient=87%",
color="#77200b", hjust=0)
g6