df <- "http://api.nobelprize.org/v1/laureate.csv"
data <- read_csv(df)
df1 <- data %>%
filter(gender != "org", !is.na(category)) %>%
mutate(age = year - year(born),
category = case_when(category == "chemistry" ~ "Chemistry",
category == "economics" ~ "Economics",
category == "literature" ~ "Literature",
category == "medicine" ~ "Medicine",
category == "peace" ~ "Peace",
TRUE ~ "Physics"))
head(df1,3)## # A tibble: 3 x 21
## id firstname surname born died bornCountry bornCountryCode
## <dbl> <chr> <chr> <date> <date> <chr> <chr>
## 1 1 Wilhelm ~ Rontgen 1845-03-27 1923-02-10 Prussia (n~ DE
## 2 2 Hendrik ~ Lorentz 1853-07-18 1928-02-04 the Nether~ NL
## 3 3 Pieter Zeeman 1865-05-25 1943-10-09 the Nether~ NL
## # ... with 14 more variables: bornCity <chr>, diedCountry <chr>,
## # diedCountryCode <chr>, diedCity <chr>, gender <chr>, year <dbl>,
## # category <chr>, overallMotivation <chr>, share <dbl>, motivation <chr>,
## # name <chr>, city <chr>, country <chr>, age <dbl>
# Label 1
my_lines<- c("Medicine", "Physics", "Chemistry", "Economics", "Literature", "Peace")
df1 %>%
select(year, age, category) %>%
na.omit() %>%
mutate(category = factor(category, levels = my_lines)) -> mydata
# Label 2
label2 <-case_when(str_detect(my_lines, "Econ")~ "Economics *", TRUE ~ my_lines)
#Data Frame for plotting text :
midPoint <- 0.5*(min(mydata$year)+ max(mydata$year))
date_text <- tibble(category = factor(my_lines, levels= my_lines),
year = rep(midPoint, 6),
age = rep(105, 6))
#color set
col <- c("#04536e", "#7c2817", "#f15c42", "#3d6a51", "#eca324", "#12a4dc")
my_font <- "Ubuntu Condensed"text1 <-date_text %>%
filter(category == "Economics") %>%
mutate(year = 1945, age = 95)
text2 <-date_text %>%
filter(category == "Peace") %>%
mutate(year = 1945, age = 23)
text3 <-date_text %>%
filter(category == "Economics") %>%
mutate(year = 1980, age = 97)
text4 <-date_text %>%
filter(category == "Peace") %>%
mutate(year = 1980, age = 25)# Basic plot
p<-mydata %>%
ggplot(aes(year, age, color=category))+
geom_point(show.legend = FALSE, size=2, alpha= 0.5)+
geom_smooth(method = "loess", show.legend = FALSE, se=FALSE, size = 1.8)+
scale_color_manual(values = col)+
facet_wrap(~ category, ncol=6)+
theme(strip.text.x = element_blank())p + geom_text(data=date_text, label= label2, family= my_font, fontface ="bold", size=5)+
guides(col= F)+
geom_text(data= text1, label="Oldest Winner", family =my_font, size=3)+
geom_text(data= text2, label="Yougest Winner", family =my_font, size=3)+
geom_curve(data = text3, xend = 2007, yend = 90, curvature = -0.5) +
geom_curve(data = text4, xend = 2014, yend = 17, curvature = -0.5) -> p1p1 +
scale_x_continuous(breaks = seq(1900, 2010, 25), labels = c("1900", " ", "50", " ", "2000")) +
scale_y_continuous(sec.axis = sec_axis(~. *1), breaks = seq(0, 105, 25), limits = c(15, 105)) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.major.y = element_line(size = 0.8)) +
theme(axis.text.y.left = element_blank()) +
theme(axis.ticks.y = element_blank()) +
theme(axis.ticks.length = unit(0.15, "cm")) +
theme(axis.text.x = element_text(size = 13, color = "grey20", family = my_font)) +
theme(axis.text.y = element_text(size = 13, color = "grey20", family = my_font)) +
theme(plot.margin = unit(c(0.7, 0.7, 0.7, 1), "cm")) +
labs(x = NULL, y = NULL,
title = "Senescience",
subtitle = "Age of Nobel laureates, at the date of award",
caption = "Data Source: Nobelprize.org") +
theme(plot.title = element_text(face = "bold", size = 21, family = my_font, hjust = 0, color = "grey10")) +
theme(plot.subtitle = element_text(size = 15, margin = margin(b = 20), hjust = 0, family = my_font, color = "grey20")) +
theme(plot.caption = element_text(size = 11, family = my_font, color = "grey20"))+
theme_economist()## Graph 만들기
cates <- c("oil, gas, coal", "Metals", "Finance", "Other",
"IT", "Real estate", "Chemicals")
cates## [1] "oil, gas, coal" "Metals" "Finance" "Other"
## [5] "IT" "Real estate" "Chemicals"
Bycate <- tibble(cates = cates,
year2005 = c(57,49,30,5,4,2,1),
year2015 = c(28,20,32,25,6,19,11))
Bycate %>%
mutate(cates = factor(cates, levels = cates)) -> Bycate
## Grapha data preprating
my_font <- "Ubuntu Condensed"
colors <- c("#68382C", "#D6777E", "#00A4E6", "grey40",
"#eca324", "#8abbd0", "#9ae5de")
legend_label <- cates <- c("oil, gas, coal", "Metals", "Finance", "Other","IT", "Real estate", "Chemicals")
pointPosition_y <-seq(4, 20, length.out = 4)
pointPosition_y <- pointPosition_y[1:3]
ggplot()+
geom_segment(aes(x=rep(2005, 4), xend=rep(2016, 4),
y=seq(0, 60, 20), yend=seq(0, 60, 20)), color="grey70", size=0.7)+
geom_segment(aes(x= 2015, xend=2005, y=0, yend=60), color="grey70", size=0.7)+
geom_segment(aes(x= 2015, xend=2015, y=0, yend=60), color="grey70", size=0.7)+
geom_point(data=Bycate, aes(x=2005, y=year2005, color=cates), size=4,
show.legend = FALSE)+
geom_point(data=Bycate, aes(x=2015, y=year2015, color=cates), size=4,
show.legend = FALSE)+
geom_segment(aes(x=2005, xend=2015, y=Bycate$year2005,
yend=Bycate$year2015, color= Bycate$cates))+
theme(panel.background = element_rect(fill="white"))+
theme(plot.background = element_rect(fill="white"))+
scale_color_manual(values=colors)+
scale_x_continuous(limits = c(2000, 2016.5))+
scale_y_continuous(limits = c(-1, 63))+
geom_text(x=c(2005, 2015), y=c(-1,-1), label = c("2005", "2015"), family=my_font, color="grey90" )+
geom_text(aes(x = rep(2016.5, 4), y = seq(0, 60, 20), label = seq(0, 60, 20)), family = my_font, color = "grey30", size = 6) +
geom_point(aes(x = 2000, y = c(Bycate$year2005[1:3], 20)), color = colors[1:4], size = 4) +
geom_text(aes(x = 2000 + 0.3, y = c(Bycate$year2005[1:3], 20)), label = legend_label[1:4], hjust = 0, family = my_font, size = 6, color = "grey30") + ## Labelling
geom_point(aes(x = 2000, y = pointPosition_y), color = colors[7:5], size = 4) +
geom_text(aes(x = 2000 + 0.3, y = pointPosition_y), label = legend_label[7:5], hjust = 0, family = my_font, size = 6, color = "grey30") +
geom_text(aes(x = 2000, y = 63, label = "by sector*, %"), size = 6, color = "black", family = my_font, hjust = 0.1, vjust = -0.5) +
theme(plot.margin = unit(c(1.2, 1, 0.7, 0.7 / 2), "cm")) +
labs(caption = "*some billionaires were involved in multi sectors", title = "", subtitle = "") +
theme(plot.caption = element_text(family = my_font, size = 8, vjust = -3)) +
theme(plot.title = element_text(size = 22, face = "bold", vjust = 4)) +
theme(plot.subtitle = element_text(size = 18, vjust = 5)) data("pwt9.0")
some_countries <- c ("France", "Vietnam", "Japan","Singapore",
"United States", "South Korea")
pwt9.1 %>%
mutate(country = as.character(country)) %>%
mutate(country = case_when(country == "United States of America" ~ "United States",
country == "Republic of Korea" ~ "South Korea",
country == "Viet Nam" ~ "Vietnam",
TRUE ~ country)) %>%
select(year, country, avh) %>%
na.omit() %>%
filter(year >= 1986) %>%
filter(country %in% some_countries) -> df_plot
# Make a draft graph:
my_colors <- c("#04536e", "#7c2817", "#f15c42", "#3d6a51", "#eca324", "#12a4dc")
my_font <- "Ubuntu Condensed"
label_y <- c(1986, rep("", 3), 1990, rep("", 3), 1994, rep("", 3), 1998, rep("", 3),
2002, rep("", 3), 2006, rep("", 3), 2010, rep("", 3), 2014, rep("", 2), 2017)
df_plot %>%
filter(year == 1988) %>%
filter(!country %in% c("South Korea", "Singapore")) -> df_text1
df_plot %>%
filter(year == 1988) %>%
filter(country %in% c("Singapore")) -> df_sin
df_plot %>%
filter(year == 1988) %>%
filter(country %in% c("South Korea")) ->df_kr
df_plot %>%
ggplot(aes(year, avh, group=country, color=country))+
geom_line(size = 1.5, show.legend = FALSE)+
theme_economist(base_family = my_font)+
scale_color_manual(values=my_colors)+
scale_x_continuous(limits = c(1986, 2017), breaks = seq(1986, 2017, 1),
labels = label_y, expand = c(0,0))+
scale_y_continuous(limits = c(1400, 3000))+
theme(panel.grid.minor = element_blank())+
geom_text(data=df_text1, aes(year, avh +50, label=country),
size=4, hjust=0, family= my_font, show.legend = FALSE)+
geom_text(data=df_sin, aes(year, avh +100, label=country),
size=4, hjust=0, family= my_font, show.legend = FALSE)+
geom_text(data=df_kr, aes(year, avh -40, label=country),
size=4, hjust=0, family= my_font, show.legend = FALSE)+
labs(x="Year", y="Average annual hours",
title="Average annaul hours by countries")# Data for ploting:
dat <- read.csv(text = "Year,Russia,World
1996,0,423
1997,4,220
1998,1,221
1999,0,298
2000,0,322
2001,8,530
2002,6,466
2003,17,459
2004,25,562
2005,27,664
2006,33,760
2007,53,893
2008,87,1038
2009,32,761
2010,62,949
2011,101,1109
2012,96,1130
2013,110,1317
2014,111,1535
2015,88,1738", header = TRUE)
dat## Year Russia World
## 1 1996 0 423
## 2 1997 4 220
## 3 1998 1 221
## 4 1999 0 298
## 5 2000 0 322
## 6 2001 8 530
## 7 2002 6 466
## 8 2003 17 459
## 9 2004 25 562
## 10 2005 27 664
## 11 2006 33 760
## 12 2007 53 893
## 13 2008 87 1038
## 14 2009 32 761
## 15 2010 62 949
## 16 2011 101 1109
## 17 2012 96 1130
## 18 2013 110 1317
## 19 2014 111 1535
## 20 2015 88 1738
colors <- c("#68382C", "#D6777E", "#00A4E6", "grey40",
"#eca324", "#8abbd0", "#9ae5de")
my_font <- "Ubuntu Condensed"# Draft
ggplot()+
annotate("curve",## 그래프 초안 작성
curvature=0,
x=1995.8,
xend=2015.5,
y=seq(50,200,50),
yend=seq(50,200,50),
color="gray70", size=0.7) +
geom_line(data=dat, aes(Year, Russia),size=1.7, col="#04536e")+
geom_line(data=dat, aes(Year, World/10), size=1.7, col="#f15c42")+
scale_y_continuous(limits = c(0, 215))+
scale_x_continuous(limits = c(1995.5, 2016.5))+
theme(panel.grid.major = element_blank())+
theme(panel.grid.minor = element_blank())+
theme(plot.margin = unit(c(1.2, 0.7, 1, 0.7), "cm"))+
theme(panel.background =element_rect(fill="white"))+
theme(plot.background = element_rect(fill="grey90"))+
annotate("text", x=1995.8, y=210, color="#7c2817",
label="Number in Russia", fontface ="bold",
vjust=-1, hjust=0.13, size=3)+
annotate("text", x=2016.5, y=210, color="#7c2817",
label="World", fontface ="bold",
vjust=-1, hjust=1.5, size=3)+
labs(title = "Off the rich list",
subtitle = "Russia billionaires",
caption = "Source: Russia’s billionaires\nD.Treisman, American Economic Review(2016)",
x="Year", y="Number")+
theme(plot.title = element_text(family = my_font, size = 22, vjust = 4, color="#04536e"))+
theme(plot.subtitle = element_text(family = my_font, size = 14, vjust = 6, color = "grey20")) +
theme(plot.caption = element_text(family = my_font, size = 12, vjust = -5, color = "grey30", hjust = 0)) ## [1] "C:/Users/Administrator/Desktop/BIG DATA"
setwd("C:/Users/Administrator/Desktop/BIG DATA")
df <- read.table("C:/Users/Administrator/Desktop/BIG DATA/NCD_RisC_Lancet_2016.txt", sep=",")
df %>% select(1:5) -> df
#colSums(is.na(df)) - Missing data None
# Renaming colums
new_names <-c ("Country", "ISO", "Gender" ,"Year","Prevalance") # 이름 지정
names(df) <- new_names
# Remove the first row and retain observations in 2015:
df %>%
slice(-1) %>%
filter(Year == "2015") -> df_2015
full_join(df_2015 %>% filter(Gender == "Men") %>% select(Country, PrevalenceMen = Prevalance),
df_2015 %>% filter(Gender == "Women") %>% select(Country, PrevalenceWomen = Prevalance),
by = "Country") -> df2015plot
library(rvest)
read_html("https://en.wikipedia.org/wiki/Central_and_Eastern_Europe") %>%
html_nodes("p+ ul li > a:nth-child(1)") %>%
html_text() -> central_easternCountry
central_easternCountry <- central_easternCountry[-c(5, 6)]
central_easternCountry <- case_when(str_detect(central_easternCountry, "North") ~ "Macedonia", TRUE ~ central_easternCountry)
central_easternCountry <- c("Russia", central_easternCountry)
library(wbstats)
general_information <- wb_cachelist
m <- general_information[[1]]
m %>%
filter(region == "Europe & Central Asia") %>%
filter(income == "High income") %>%
pull(country) -> Highincomecountry
m %>%
filter(region == "Sub-Saharna Africa") %>%
pull(country) -> subSahAfri
case_when(str_detect(subSahAfri, "Congo, Dem. Rep.") ~ "DR Congo",
str_detect(subSahAfri, "Congo, Rep.") ~ "Congo",
TRUE ~ subSahAfri) -> subSahAfri
df2015plot %>%
mutate_all(as.character) %>%
mutate(Country = case_when(Country == "United States of America" ~ "United States",
Country == "Russian Federation" ~ "Russia",
Country == "United Kingdom" ~ "Britain",
TRUE ~ Country)) %>%
mutate_at(c("PrevalenceMen", "PrevalenceWomen"), .funs = as.numeric) %>%
mutate(Region = case_when(Country %in% central_easternCountry ~ "Central and Eastern Europe",
Country %in% Highincomecountry ~ "High-Income Western",
Country %in% subSahAfri ~ "Sub-Saharan Africa",
TRUE ~ "Others")) -> df_final
levels <- c("Central and Eastern Europe", "High-Income Western", "Sub-Saharan Africa", "Others")
my_colors <- c("#f15b40", "#eca221", "#00526d", "#b0c6d2")
my_font <- "Ubuntu Condensed"
df_final %>%
mutate(Region = factor(Region, levels = levels)) %>%
mutate_if(is.numeric, function(x) {rescale(x, to = c(0, 40))}) -> df_final
df_ground <- tibble(PrevalenceWomen = 0:40, PrevalenceMen = 0:40)
some_countries <- c("Russia", "Viet Nam", "Croatia", "India", "Nigeria",
"Germany", "Thailand", "United States", "Singapore")
df_mini <- df_final %>% filter(Country %in% some_countries)
df_final %>%
ggplot(aes(PrevalenceWomen, PrevalenceMen)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = my_colors[3], size = 0.8, alpha = 0.6) +
geom_area(data = df_ground, aes(x = PrevalenceWomen, y = PrevalenceMen), fill = "#E9F2F7", alpha = 0.6) +
geom_point(size = 3, aes(color = Region), alpha= 0.5) +
geom_point(aes(x = PrevalenceWomen, y = PrevalenceMen, color = Region), data = df_mini,
shape = 21, size = 3, color = "black") +
theme_minimal(base_family = my_font) +
scale_y_continuous(limits = c(0, 40)) +
scale_x_continuous(limits = c(0, 40)) +
scale_color_manual(values = my_colors) +
theme(panel.grid.major = element_line(colour = "#dbe2e7", size = 0.5)) +
theme(panel.grid.minor = element_blank()) +
theme(legend.title = element_blank()) +
theme(legend.position = "top") +
geom_text_repel(data = df_mini, family = my_font, color = "grey20", size = 3, force = 19,
aes(x = PrevalenceWomen, y = PrevalenceMen, label = Country)) +
theme(plot.margin = unit(rep(1, 4), "cm")) +
theme(plot.title = element_text(size = 15, color = "grey20")) +
theme(plot.subtitle = element_text(size = 5, color = "grey30")) +
theme(plot.caption = element_text(size = 5, color = "grey30")) +
theme(axis.title = element_text(size = 10, color = "grey15")) +
theme(axis.text = element_text(size = 10, color = "grey15")) +
theme(legend.text = element_text(color = "grey30", size = 10)) +
labs(title = "Matters of the heart",
subtitle = "Prevalance of raised blood pressure*, by sex, 2015, %",
caption = "Source: MCD Risk Factor Collaboration",
x = "Woman",
y = "Man")#devtools::install_github("expersso/WHO")
library(WHO)
gov <- get_data("WHS7_108")
total <- get_data("WHS7_105")
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 1995 2000 2005 2005 2010 2014
#colSums(is.na(gov))
gov %>%
filter(year == "2014", !is.na(country)) %>%
select(gov_exp = value, country) -> df_gov
total %>%
filter(year == "2014", !is.na(country)) %>%
select(total_exp = value, country) -> df_total
nations <- intersect(df_gov$country, df_total$country) # list 묶음 intersect
full_join(df_gov %>% filter(country %in% nations),
df_total %>% filter(country %in% nations), by="country")%>%
select(country, everything())%>%
mutate(private_exp = total_exp - gov_exp)%>%
mutate(gov_rate = gov_exp/ total_exp)%>%
arrange(gov_rate)%>%
mutate(rank = 1:nrow(.)) ->df_plot
# Rename
df_plot %>%
mutate(country = case_when(country == "Brunei Darussalam" ~ "Brunei",
country == "Republic of Korea" ~ "South Korea",
country == "United Kingdom of Great Britain and Northern Ireland" ~ "United Kingdom",
country == "United States of America" ~ "United States",
country == "Russian Federation" ~ "Russia",
country == "Viet Nam" ~ "Vietnam",
TRUE ~ country)) -> dfPlot
# country selection
northernCon <- c("Denmark", "Finland", "Norway", "Sweden")
specialCon <- c("Qatar", "Brunei", "Cuba")
aseanCon <- c("Singapore", "Thailand", "Malaysia", "Vietnam")
others <- c("India", "China", "United States", "United Kingdom", "France",
"Germany", "Canada", "Japan", "South Korea")
dfPlot %>%
filter(country %in% c(northernCon, specialCon, aseanCon, others)) %>%
arrange(gov_rate) -> df
head(df,2)## # A tibble: 2 x 6
## country gov_exp total_exp private_exp gov_rate rank
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 India 80.3 267. 187. 0.300 17
## 2 Singapore 1689. 4047. 2358. 0.417 34
my_levels <- df %>%
pull(country)
df %>%
mutate(country = factor(country), levels= my_levels) %>%
select(country, private_exp, gov_exp) %>%
gather(type, value, -country) ->df1
df %>%
mutate(gov_rate = gov_rate *100, total_rate= 100) %>%
mutate(country = as.factor(country)) %>%
mutate(label = round(gov_rate,1) %>% as.character()) %>%
mutate(label = paste0(label, "%")) %>%
mutate(label_money = round(total_exp, 0) %>% scales::dollar()) %>%
mutate(private_rate = round(private_exp/total_exp * 100,1), paste0(private_rate, "%"))%>%
arrange(desc(gov_rate)) ->df_2
View(df_2)
my_colors <- c("#014d64", "#01a2d9")
my_font <- "Ubuntu Condensed"
colorLevels <- c("private_exp", "gov_exp")
## Reordering countries
df_2$counties <-factor(df_2$country, levels=df_2$country[order(-df_2$gov_rate)])
df_2 %>%
mutate(color_lavel_y= case_when(country == "South Korea" ~"red", TRUE~"grey20")) %>%
mutate(bold_y= case_when(country == "South Korea"~ "bold", TRUE ~"plain")) ->df_korea
df_2%>%
ggplot()+
coord_flip() +
theme_minimal() +
geom_col(aes(x = counties, y = total_rate, fill = "Private"), width = 0.9) +
geom_col(aes(x = counties, y = gov_rate, fill = "Governement"), width = 0.9) +
scale_y_continuous(limits = c(0, 100))+
scale_fill_manual(values= my_colors, name="Type")+
geom_text(data=df_2, aes(x=country, y=10, label=label), color="white", family= my_font)+
labs(x = NULL, y = NULL,
title = "Share of Korea government spending on health care, 2014",
caption = "Source: World Health Organization (WHO)") +
theme(plot.title = element_text(size = 13, colour = "grey20", family = my_font),
plot.caption = element_text(color = "grey30", face = "italic", size = 11))+
theme(axis.text.y = element_text(color = df_korea$color_label_y, size = 12, face = df_korea$bold_y)) # Extract data from link:
link <- "https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy?fbclid=IwAR0jRtf0RacPLxVPgcuu4zgYJl9HDk01cNy6u48vvyTuoK9CC1jZH8_pZPQ"
link %>%
read_html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>%
html_table(fill = TRUE) %>%
.[[1]] -> data_raw
View(data)
# Rename for columns:
names(data_raw) <- c("Country", "Both_M", "Both_F", "Female", "Male")
# Remove the first row:
data_raw[-1,] -> data
data[,1:5] -> data
na.omit(data) -> data
#Convert to numeric and factor, removing missing data
data %>%
mutate(Country = factor(Country)) %>%
na.omit() -> df_life
df_life[,2:5] <- lapply(df_life[,2:5], as.numeric)
df_life[,-c(2:3)] %>%
na.omit() -> df_life
df_life[order(df_life$Female, decreasing = T),] %>%
slice(1:20) %>%
mutate(diff = Male - Female) %>%
mutate(diff = as.numeric(diff))-> df_plot
View(df_plot)
#df_2$counties <-factor(df_2$country, levels=df_2$country[order(-df_2$gov_rate)])
h <- 0.8
h1 <- 84.2
h2 <- h1 + 1
h3 <- -1.8
maleColor <- "#2859C9"
femaleColor <- "#B92F77"
my_font <- "Ubuntu Condensed"
df_plot %>%
filter(Country == "Switzerland") -> swits
swits## Country Female Male diff
## 1 Switzerland 80.3 85.1 4.8
df_plot %>%
ggplot() +
geom_dumbbell(aes(y= Country,
x=Female, xend= Male), size=1.5, color="grey60",
colour_x = "#F7BC08", colour_xend = "#395B74",
size_x = 4.2, size_xend = 4.2)+
geom_text(aes(x=Male -h, y= Country, label=Male), vjust=0.5, hjust= -1.2)+
geom_text(aes(x=Female -h, y= Country, label=Male), vjust=0.5, hjust= 0.8)+
scale_x_continuous(limits = c(75,100))+
scale_y_discrete(expand = c(0.075,0))+
geom_text(data= swits, aes(label="Male", x=Male, y=Country),
hjust=-1.2, vjust=-1.5, fontface="bold")+
geom_text(data= swits, aes(label="Female", x=Female, y=Country),
hjust=0.8, vjust=-1.5, fontface="bold")+
theme_minimal(base_family = my_font) +
theme(axis.title = element_blank()) +
theme(axis.text.x = element_blank()) +
theme(panel.grid.major = element_line(colour = "#dbe2e7", size = 0.5)) +
theme(plot.margin = unit(rep(1, 4), "cm")) +
theme(axis.text.y = element_text(size = 13, family = my_font)) +
theme(plot.title = element_text(size = 15, family = my_font, color = "gray20")) +
theme(plot.caption = element_text(size = 10, face = "italic", color = "grey30")) +
theme(plot.subtitle = element_text(size = 12, color = "grey30"))+
labs(x = NULL, y = NULL,
title = "Gender gap in life expectancy across 20 countries",
subtitle = "Note: By descending order of female life expectancy",
caption = "Data Source: United Nations Development Programme") # Extract data from link:
link <- "https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy?fbclid=IwAR0jRtf0RacPLxVPgcuu4zgYJl9HDk01cNy6u48vvyTuoK9CC1jZH8_pZPQ"
link %>%
read_html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>%
html_table(fill = TRUE) %>%
.[[1]] -> data_raw## # A tibble: 241 x 1
## data_raw$Country $`GBD 2010[9][1~ $`GBD 2010[9][1~ $`CIA 2017[11]`
## <chr> <chr> <chr> <chr>
## 1 Country Male Female M
## 2 Afghanistan 58.2+4.6<U+2212>4.0 57.3+4.4<U+2212>5.1 50.3
## 3 Albania 72.0+2.9<U+2212>2.8 78.1+2.1<U+2212>2.2 75.8
## 4 Algeria 74.3+1.1<U+2212>1.1 76.5+1.0<U+2212>1.0 75.6
## 5 American Samoa N/A N/A 71.3
## 6 Andorra 79.8+1.2<U+2212>1.0 85.2+1.0<U+2212>1.0 80.7
## 7 Angola 57.9+8.6<U+2212>8.4 63.9+8.1<U+2212>7.9 58.2
## 8 Anguilla N/A N/A 78.9
## 9 Antigua and Bar~ 74.1+1.8<U+2212>1.9 79.0+1.5<U+2212>1.7 74.6
## 10 Argentina 72.5+0.1<U+2212>0.1 79.3+0.1<U+2212>0.1 74.2
## # ... with 231 more rows, and 11 more variables: $`CIA 2017[11]` <chr>, $`CIA
## # 2017[11]` <chr>, $`UN 2015[3]` <chr>, $`UN 2015[3]` <chr>, $`UN
## # 2015[3]` <chr>, $`WHO 2015[12][13]` <chr>, $`WHO 2015[12][13]` <chr>, $`WHO
## # 2015[12][13]` <chr>, $`OECD 2016[14]` <chr>, $`OECD 2016[14]` <chr>, $`OECD
## # 2016[14]` <chr>
data_one <- data_raw[, -c(2,3)]
data_one <- data_one[, c(1:4)]
data_one[-1,] -> data
names(data) <- c("Country", "Male" , "Female", "Both")
data %>%
mutate(Country = factor(Country)) %>%
mutate_if(is.character, as.numeric) %>%
mutate(Country = as.character(Country)) %>%
mutate(Country = case_when(str_detect(Country, "South Korea") ~ "South Korea",
TRUE ~ Country)) %>%
na.omit() -> df_one
df_one## Country Male Female Both
## 1 Afghanistan 50.3 53.2 51.7
## 2 Albania 75.8 81.4 78.5
## 3 Algeria 75.6 78.4 77.0
## 4 American Samoa 71.3 75.6 73.4
## 5 Andorra 80.7 85.2 82.9
## 6 Angola 58.2 62.3 60.2
## 7 Anguilla 78.9 84.2 81.5
## 8 Antigua and Barbuda 74.6 79.0 76.7
## 9 Argentina 74.2 80.6 77.3
## 10 Armenia 71.6 78.5 74.9
## 11 Aruba 73.9 80.1 76.9
## 12 Australia 79.8 84.9 82.3
## 13 Austria 78.9 84.4 81.6
## 14 Azerbaijan 69.7 76.1 72.8
## 16 Bahrain 76.8 81.3 79.0
## 17 Bangladesh 71.3 75.6 73.4
## 18 Barbados 73.2 77.9 75.5
## 19 Belarus 67.5 78.8 73.0
## 20 Belgium 78.5 83.8 81.1
## 21 Belize 67.3 70.6 68.9
## 22 Benin 60.9 63.8 62.3
## 23 Bermuda 78.2 84.6 81.4
## 24 Bhutan 69.6 71.7 70.6
## 25 Bolivia 66.7 72.4 69.5
## 26 Bosnia and Herzegovina 73.9 80.2 76.9
## 27 Botswana 61.2 65.5 63.3
## 28 Brazil 70.5 77.7 74.0
## 29 British Virgin Islands 77.4 80.3 78.8
## 30 Brunei 75.0 79.8 77.3
## 31 Bulgaria 71.4 78.2 74.7
## 32 Burkina Faso 53.8 58.0 55.9
## 33 Burma 66.6 69.9 68.2
## 34 Burundi 59.2 62.7 60.9
## 35 Cabo Verde 70.1 74.8 72.4
## 36 Cambodia 62.4 67.5 64.9
## 37 Cameroon 57.6 59.9 59.0
## 38 Canada[b] 79.3 84.7 81.9
## 40 Cayman Islands 78.6 84.1 81.3
## 41 Central African Republic 51.4 54.2 52.8
## 42 Chad 49.4 51.9 50.6
## 43 Chile 75.9 82.1 78.9
## 45 Colombia 72.8 79.3 75.9
## 46 Comoros 62.3 67.0 64.6
## 47 Republic of the Congo 58.6 61.1 59.8
## 48 Cook Islands 73.2 79.0 76.0
## 49 Costa Rica 76.1 81.5 78.7
## 50 Cote d'Ivoire 57.8 60.2 59.0
## 51 Croatia 72.9 79.4 76.1
## 52 Cuba 76.5 81.3 78.8
## 53 Curacao 76.2 80.9 78.5
## 54 Cyprus 76.0 81.8 78.8
## 55 Czech Republic 75.8 81.9 78.8
## 56 Denmark 77.1 82.1 79.5
## 57 Djibouti 61.0 66.2 63.6
## 58 Dominica 74.2 80.3 77.2
## 59 Dominican Republic 76.0 80.6 78.3
## 60 DR Congo 56.1 59.3 57.7
## 61 Ecuador 74.0 80.1 77.0
## 62 Egypt 71.6 74.4 73.0
## 63 El Salvador 71.6 78.3 74.9
## 64 Equatorial Guinea 63.4 65.8 64.6
## 65 Eritrea 62.7 67.8 65.2
## 66 Estonia 72.1 81.9 76.9
## 67 Eswatini 52.7 51.5 52.1
## 68 Ethiopia 60.1 64.7 62.6
## 69 Faroe Islands 78.0 83.2 80.5
## 70 Federated States of Micronesia 71.1 75.3 73.1
## 71 Fiji 70.3 75.8 73.0
## 72 Finland 78.0 84.1 81.0
## 73 France[c] 78.8 85.2 81.9
## 75 French Polynesia 75.1 79.8 77.4
## 76 Gabon 51.7 52.5 52.1
## 78 Gaza Strip 72.5 75.9 74.2
## 79 Georgia 72.3 80.7 76.4
## 80 Germany 78.5 83.3 80.8
## 81 Ghana 64.5 69.6 67.0
## 82 Gibraltar 76.7 82.6 79.6
## 83 Greece 78.0 83.4 80.7
## 84 Greenland 69.9 75.5 72.6
## 85 Grenada 71.9 77.4 74.5
## 87 Guam 73.6 78.6 76.0
## 88 Guatemala 70.6 74.7 72.6
## 89 Guernsey 79.9 85.4 82.6
## 90 Guinea 59.5 62.6 61.0
## 91 Guinea-Bissau 48.9 53.1 51.0
## 92 Guyana 65.6 71.8 68.6
## 93 Haiti 61.6 66.8 64.2
## 94 Honduras 69.5 72.9 71.2
## 95 Hong Kong 80.4 85.9 83.0
## 96 Hungary 72.4 80.0 76.1
## 97 Iceland 80.9 85.4 83.1
## 98 India 67.6 70.1 68.8
## 99 Indonesia 70.4 75.7 73.0
## 100 Iran 72.7 75.5 74.0
## 101 Iraq 72.6 77.2 74.9
## 102 Ireland 78.6 83.4 80.9
## 103 Isle of Man 79.6 83.2 81.3
## 104 Israel 80.7 84.5 82.5
## 105 Italy 79.6 85.1 82.3
## 106 Jamaica 72.1 75.4 73.7
## 107 Japan 81.9 88.8 85.3
## 108 Jersey 79.5 84.6 81.9
## 109 Jordan 73.4 76.3 74.8
## 110 Kazakhstan 65.9 76.0 71.1
## 111 Kenya 62.8 65.8 64.3
## 112 Kiribati 64.0 69.1 66.5
## 113 Kuwait 76.8 79.6 78.2
## 114 Kyrgyzstan 66.8 75.4 70.9
## 115 Laos 62.6 66.7 64.6
## 116 Latvia 70.1 79.5 74.7
## 117 Lebanon 76.5 79.1 77.8
## 118 Lesotho 53.0 53.1 53.0
## 119 Liberia 61.2 65.5 63.3
## 120 Libya 74.9 78.5 76.7
## 121 Liechtenstein 79.7 84.7 81.9
## 122 Lithuania 69.7 80.7 75.0
## 123 Luxembourg 79.9 84.9 82.3
## 124 Macau 81.6 87.7 84.6
## 125 Madagascar 64.7 67.8 66.3
## 126 Malawi 59.7 63.8 61.7
## 127 Malaysia 72.4 78.2 75.2
## 128 Maldives 73.5 78.3 75.8
## 129 Mali 58.2 62.5 60.3
## 130 Malta 78.1 83.0 80.5
## 131 Marshall Islands 71.2 75.7 73.4
## 133 Mauritania 61.1 65.8 63.4
## 134 Mauritius 72.4 79.5 75.8
## 136 Mexico 73.3 79.0 76.1
## 138 Moldova 67.1 75.1 71.0
## 139 Monaco 85.6 93.5 89.4
## 140 Mongolia 65.7 74.4 69.9
## 142 Montserrat 75.9 73.2 74.6
## 143 Morocco 74.0 80.3 77.1
## 144 Mozambique 52.9 54.5 53.7
## 146 Namibia 62.4 65.6 64.0
## 147 Nauru 63.3 70.9 67.4
## 148 Nepal 70.4 71.6 71.0
## 149 Netherlands 79.3 83.7 81.4
## 150 New Caledonia 73.9 82.0 77.9
## 151 New Zealand 79.1 83.5 81.3
## 152 Nicaragua 71.3 75.8 73.4
## 153 Niger 54.7 57.3 55.9
## 154 Nigeria 52.8 55.0 53.8
## 155 North Korea 66.9 74.8 70.7
## 156 North Macedonia 73.8 79.2 76.4
## 157 Northern Mariana Islands 73.4 77.8 75.4
## 158 Norway 79.8 84.0 81.9
## 159 Oman 73.7 77.7 75.7
## 160 Pakistan 66.1 70.1 68.1
## 161 Palau 70.2 76.8 73.4
## 163 Panama 76.0 81.7 78.8
## 164 Papua New Guinea 65.1 69.7 67.3
## 165 Paraguay 74.7 80.2 77.4
## 166 People's Republic of China 74.6 79.0 76.7
## 167 Peru 71.9 76.1 74.0
## 168 Philippines 65.9 73.1 69.4
## 169 Poland 73.9 81.8 77.8
## 170 Portugal 76.2 82.9 79.4
## 171 Puerto Rico 77.6 84.4 80.9
## 172 Qatar 77.7 82.2 80.0
## 174 Romania 71.9 79.0 75.4
## 175 Russia 65.3 77.1 71.0
## 176 Rwanda 62.3 66.3 64.3
## 177 Sahrawi Arab Democratic Republic 61.1 65.8 63.4
## 178 Saint Helena, Ascension and Tristan da Cunha 76.7 82.7 79.6
## 179 Saint Kitts and Nevis 73.5 78.4 75.9
## 180 Saint Lucia 75.2 80.8 77.9
## 181 Saint Pierre and Miquelon 78.3 83.1 80.6
## 183 Samoa 71.1 77.0 74.0
## 184 San Marino 80.8 86.1 83.3
## 185 Sao Tome and Principe 63.9 66.7 65.3
## 186 Saudi Arabia 73.4 77.7 75.5
## 187 Senegal 60.0 64.3 62.1
## 188 Serbia 72.8 78.8 75.7
## 189 Seychelles 70.4 79.6 74.9
## 190 Sierra Leone 56.0 61.3 58.6
## 191 Singapore 82.6 88.1 85.2
## 192 Sint Maarten 76.0 80.8 78.3
## 193 Slovakia 73.7 81.1 77.3
## 194 Slovenia 74.8 82.2 78.3
## 195 Solomon Islands 72.9 78.3 75.6
## 196 Somalia 50.7 54.9 52.8
## 197 South Africa 62.4 65.3 63.8
## 198 South Korea 79.3 85.8 82.5
## 200 Spain 78.8 84.9 81.8
## 201 Sri Lanka 73.5 80.6 76.9
## 202 Sudan 62.3 66.7 64.4
## 203 Suriname 70.1 75.1 72.5
## 204 Sweden 80.2 84.2 82.1
## 205 Switzerland 80.3 85.1 82.6
## 206 Syria 72.7 77.6 75.1
## 207 Taiwan 77.1 83.6 80.2
## 208 Tajikistan 64.9 71.4 68.1
## 209 Tanzania 61.2 64.1 62.6
## 210 Thailand 71.7 78.3 74.9
## 211 Bahamas 70.2 75.1 72.6
## 212 Gambia 62.8 67.5 65.1
## 213 Timor-Leste 66.8 70.1 68.4
## 214 Togo 62.8 68.1 65.4
## 215 Tonga 74.9 78.1 76.4
## 216 Trinidad and Tobago 70.2 76.2 73.1
## 217 Tunisia 74.1 77.4 75.7
## 218 Turkey 72.7 77.5 75.0
## 219 Turkmenistan 67.4 73.6 70.4
## 220 Turks and Caicos Islands 77.2 82.9 80.0
## 221 Tuvalu 64.7 69.2 66.9
## 222 U.S. Virgin Islands 76.2 82.8 79.4
## 223 Uganda 54.4 57.3 55.9
## 224 Ukraine 67.4 77.1 72.1
## 225 United Arab Emirates 75.0 80.4 77.7
## 226 United Kingdom 78.6 83.1 80.8
## 227 United States 76.8 81.0 78.8
## 228 Uruguay 74.2 80.6 77.4
## 229 Uzbekistan 71.0 77.3 74.0
## 230 Vanuatu 72.1 75.4 73.7
## 231 Venezuela 73.0 79.1 76.0
## 232 Vietnam 71.2 76.4 73.7
## 233 Wallis and Futuna 76.8 83.0 79.8
## 234 West Bank 73.2 77.4 75.2
## 236 Yemen 63.7 68.2 65.9
## 237 Zambia 51.1 54.4 52.7
## 238 Zimbabwe 58.3 62.5 60.4
## 239 European Union 77.4 83.2 80.2
## 240 World 69.0 67.0 71.1
asean_link <- "https://en.wikipedia.org/wiki/Association_of_Southeast_Asian_Nations"
asean_link %>%
read_html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>%
html_table(fill = TRUE) %>%
.[[1]] -> asean_namesg7_link <- "https://en.wikipedia.org/wiki/Group_of_Seven"
g7_link %>%
read_html() %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[5]') %>%
html_table(fill = TRUE) %>%
.[[1]] -> g7_namesdf_one %>%
filter(Country %in% c(asean_names$Country, g7_names$Member, "South Korea")) %>%
mutate(text_color = case_when(Country == "South Korea" ~ "firebrick",
TRUE ~ "gray20")) -> df_two
df_two %>%
arrange(Male) %>%
mutate(Country = factor(Country, levels = Country)) -> Asean_G7
DT::datatable(Asean_G7)## [1] 62.4 62.6 65.9 70.4 71.2 71.7 72.4 75.0 76.8 77.4 78.5 78.6 79.3 79.6 81.9
## [16] 82.6
maleColor <- "#2859C9"
femaleColor <- "#B92F77"
Asean_G7 %>%
ggplot(aes(y= Country)) +
geom_dumbbell(aes(x = Male, xend = Female), size =1.5, color = "black",
colour_x = maleColor, colour_xend = femaleColor) +
geom_text(aes(x=Male, label=Male), hjust = 1.2, size= 3.5 )+
geom_text(aes(x=Female, label = Female), hjust = -0.5, size= 3.5)+
scale_x_continuous(limits = c(40, 100)) +
theme(axis.text.y = element_text(color = Asean_G7$text_color,size=8))+
theme(plot.title = element_text(hjust=0.5, face="bold"),
plot.background=element_rect(fill="white"),
panel.background=element_rect(fill="white"),
panel.grid.minor=element_blank(),
panel.grid.major.y=element_blank(),
panel.grid.major.x=element_line(),
axis.ticks=element_blank(),
legend.position="top",
panel.border=element_blank())+
theme(plot.title = element_text(color = "gray30",size=13))+
theme(plot.subtitle = element_text(color = "gray15"))+
labs(x= NULL, y = NULL,
title = "Gender gap in life expectation of Asean G7 Countries",
subtitle = "By descending order of Male life expection")## List of 1
## $ axis.title: list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
## [1] Cambodia Laos Philippines Indonesia Vietnam
## [6] Thailand Malaysia Brunei United States European Union
## [11] Germany United Kingdom South Korea Italy Japan
## [16] Singapore
## 16 Levels: Cambodia Laos Philippines Indonesia Vietnam Thailand ... Singapore
df_one %>%
mutate(cate_conturies = ifelse(Both <= 60, "The Shortest",
ifelse(Both > 60 & Both < 80, "Average","The Longest"))) ->df_one
my_colors <- c("#f15b40", "#eca221", "#00526d", "#b0c6d2")
p<-df_one %>%
ggplot(aes(x= Male, y=Female))+
geom_abline(slope =1, intercept = 0, linetype = "dashed", color=my_colors[3],
size= 0.8, alpha= 0.8)+
geom_point(size= 2, aes(color= cate_conturies), alpha= 0.7)+
geom_point(aes(x= Male, y= Female, color = Country), shape =21,
size=3, color= "black", data= Asean_G7)+
geom_text_repel(data= Asean_G7, aes(x= Male, y= Female, label = Country),
size= 3.5, col="gray20")+
scale_x_continuous(limits = c(50,90))+
scale_y_continuous(limits = c(50,90))+
scale_color_manual(values= my_colors, name = "Levels of Life Expectation")+
theme(legend.position = "top")+
theme_minimal() +
theme(plot.title = element_text(size = 15, color = "grey20")) +
theme(plot.subtitle = element_text(size = 5, color = "grey30")) +
theme(plot.caption = element_text(size = 5, color = "grey30")) +
theme(axis.title = element_text(size = 10, color = "grey15")) +
theme(axis.text = element_text(size = 10, color = "grey15")) +
theme(legend.text = element_text(color = "grey30", size = 10))+
labs(title = "Life Expectation",
caption = "Source: wikipedia",
x = "Male",
y = "Female")
p