#devtools::install_github("ricardo-bion/ggradar", dependencies = TRUE)
library(tidyverse)
library(fmsb)
library(showtext)
library(scales)
library(ggradar)
library(ggplot2)
library(rnaturalearth)
library(countrycode)
library(plotly)
library(leaflet)
library(wordcloud)
library(SnowballC)
library(tidytext)
library(maps)
library(geosphere)
purple <- "#E83151"
pink <- "#D36582"
blue <- "#255C99"
yellow <- "#F7B267"
cyan <- "#01BAEF"
green <- "#16DB93"
grey <- "#EFEFEF"
colfunc <- colorRampPalette(c(purple, blue, green, cyan))
colfunc2 <- colorRampPalette(c(blue, cyan, grey, pink, purple))
font_add_google("Lato", "lato")
font <- "lato"
# Showtext will be automatically invoked when needed
showtext_auto()
load(file = "OECD skill job level 1.RData")
load(file = "OECD skill job level 2.RData")
OCDE_macro <- sjd1
OCDE_micro <- sjd2
rm(sjd1,sjd2)
OCDE_micro$country[OCDE_micro$country=="United States"] <- "United States of America"
OCDE_micro$country[OCDE_micro$country=="Slovak Republic"] <- "Slovakia"
OCDE_macro$country[OCDE_macro$country=="United States"] <- "United States of America"
OCDE_macro$country[OCDE_macro$country=="Slovak Republic"] <- "Slovakia"
Countries_list <- OCDE_macro$country %>% unique()
OCDE_macro$value <- OCDE_macro$value*100
OCDE_micro$value <- OCDE_micro$value*100
Skills <- OCDE_macro$skill1 %>% unique()
world <- ne_countries() %>%
as.data.frame() %>%
filter(admin %in% Countries_list) %>%
select(admin,pop_est,gdp_md_est) %>%
mutate(Region = countrycode(sourcevar = admin, origin = "country.name",destination = "region")) %>%
`colnames<-`(c("country","population","GDP","Region"))
Skill_to_analyse <- Skills[3]
Skill_data <- merge(OCDE_macro, world) %>%
filter(skill1 == Skill_to_analyse) %>%
mutate(country = fct_reorder(country,value))
Skill_graph <- ggplot(Skill_data, aes(x = country, y=value, fill= Region))+
geom_col()+
geom_text(size = 2.5, colour = "white", fontface = "bold", family = font,
aes(y = ifelse(value<0,value+1.5,value-1.5), label = paste0(value %>% round(0),"%")))+
scale_fill_manual(values=colfunc(4))+
#geom_flag(x = 0, aes(image = Flag)) +
coord_flip()+
labs(title=paste0(Skill_to_analyse,
" imbalances :\n",
nrow(Skill_data %>% filter(value > 0)),
" countries in need"),
subtitle= paste0(Skill_data %>% group_by(Region) %>% summarise(value = mean(value)) %>% arrange(value) %>% head(1) %>% select(Region) %>% as.character()," in surplus"),
y="Value",x=NULL,
caption="Source : OCDE, 2015") +
geom_hline(aes(yintercept = mean(value))) +
geom_text(size = 4,
aes(2,
mean(value)+1.1,
label = paste0("Mean : ",mean(value) %>% round(0), "%"),
vjust = 1.2,
hjust = 0.1),
col = purple)+
scale_x_discrete(guide = guide_axis(n.dodge=1)) +
theme(legend.position = c(0.85,0.2),
legend.background = element_blank(),
panel.background = element_blank(),
legend.text = element_text(size = 8, family = font),
axis.line = element_line(),
axis.text.y = element_text(angle = 20),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))
Skill_graph

map <- ne_countries()
#map <- map[map$admin %in% Countries_list,]
map$value <- Skill_data$value[match(map$admin,Skill_data$country)]
pal <- colorBin(
palette = c(blue, grey, grey, purple), domain = map$value, na.color = "white",
bins = seq(-20, 40, by = 5)
)
#map$value %>% max(na.rm = T)
map$labels <- paste0(
"<strong> Country: </strong> ",
map$admin, "<br/> ",
"<strong> Skills: </strong> ",
round(map$value,2), "<br/> "
) %>%
lapply(htmltools::HTML)
#Building the leaflet map
LeafMap <- leaflet(map) %>%
setView(lng = 0, lat = 30, zoom = 1.3) %>%
addPolygons(
fillColor = ~ pal(value),
color = blue,
weight = 1,
opacity = 1,
fillOpacity = 1,
label = ~labels,
highlight = highlightOptions(
color = cyan,
bringToFront = TRUE,
fill = 1, fillOpacity=1
)
) %>%
leaflet::addLegend(
pal = pal, values = ~value,
opacity = 1, title = "Skills imbalances"
)
LeafMap
Country_graph <- merge(OCDE_macro, world) %>%
filter(country != "United States of America") %>%
group_by(country) %>%
summarise(value = mean(value),
GDP = mean(GDP),
population = mean(population)) %>%
mutate(GDP = GDP/1000000,
population = population/1000000)
Choice_GDP_Pop <- "GDP"
ifelse(Choice_GDP_Pop == "Population",
Country_graph <- Country_graph %>% select(-GDP),
Country_graph <- Country_graph %>% select(-population))
colnames(Country_graph)[3] <- "infos"
Country_plot <- ggplot(Country_graph, aes(x = value, y = infos, color = value))+
geom_vline(xintercept = 0, col = grey)+
geom_point()+
geom_text(aes(label = country),
family = font,
hjust=-0.1,
vjust=-0.5,
size = 3,
check_overlap = T)+
geom_smooth(color = blue, method = lm, se = FALSE)+
scale_color_gradient(low = purple, high = blue)+
guides(color = guide_colorsteps(barwidth = 15,
barheight = 0.5,
title.position = "top",
title.hjust = 0.5,
title="Scale where -100 represents a surplus\n of skills and 100 a lack"))+
ylim(0,0.5+Country_graph$infos %>% max())+
scale_x_continuous(
limits = c(-15,45),
breaks = c(-15, 0, 15, 30, 45))+
labs(title=paste0("The larger the ",Choice_GDP_Pop,", the greater the skills shortage"),
subtitle= paste0("The extremes : ",
Country_graph %>% arrange(value) %>% head(1) %>% select(1) %>% as.character(),
" in surplus and ",
Country_graph %>% arrange(desc(value)) %>% head(1) %>% select(1) %>% as.character(),
" in shortage"),
x=NULL, y=paste0(Choice_GDP_Pop," (million)"),
caption="Source : OCDE, 2015") +
theme(legend.background = element_blank(),
legend.position = "bottom",
panel.background = element_blank(),
legend.text = element_text(size = 8, family = font),
axis.line = element_line(),
axis.text.y = element_text(angle = 0, family = font),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))
Country_plot

#Pivot my dataframe
Skill_radar <-xtabs(formula=value~country+skill1,data=OCDE_macro) %>%
as.data.frame.matrix()
Skill_radar <- Skill_radar %>%
mutate(Country = row.names(Skill_radar)) %>%
select(Country, everything()) %>%
mutate_at(vars(-Country), rescale) %>%
`rownames<-`(1:nrow(Skill_radar))
colnames(Skill_radar) <- gsub("[()]", " ", colnames(Skill_radar)) %>%
str_remove_all("Skills") %>%
str_remove_all("Basic") %>%
str_replace_all(" ","\n") %>%
str_remove_all("\n\n")
#County for comparison
Country1 <- "France" #Countries_list[2]
Country2 <- "Spain" #Countries_list[16]
Comparison <- c(Country1, Country2)
#plot my radarchart
Skill_radar_graph <- Skill_radar %>%
filter(Country %in% Comparison) %>%
ggradar(font.radar = font,
grid.label.size = 4, # Affects the grid annotations (0%, 50%, etc.)
axis.label.size = 4,
group.point.size = 5, # Simply the size of the point
group.colours = c(blue, purple))+
labs(title = paste("Skills imbalances between",Country1,"and",Country2),
caption = "Source : OCDE, 2015")+
theme(legend.position = c(-0.1,0.2),
legend.justification = "left",
legend.text = element_text(size = 10, family = font),
legend.key = element_rect(fill = NA, color = NA),
legend.background = element_blank(),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))
Skill_radar_graph

Skill_micro <- OCDE_micro %>%
group_by(skill1, skill2) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
mutate(skill2 = fct_reorder(skill2,value))
Skill_micro_graph <- ggplot(Skill_micro, aes(x = skill2, y=value, fill = skill1))+
geom_col()+
geom_text(size = 2.5, colour = "white", fontface = "bold", family = font,
aes(y = ifelse(value<0,value+1.5,value-1.5), label = paste0(value %>% round(0),"%")))+
scale_fill_manual(values=colfunc2(7))+
coord_flip()+
labs(title=paste0(Skill_to_analyse,
" imbalances : ",
nrow(Skill_data %>% filter(value > 0)),
" countries in need"),
subtitle= paste0(Skill_data %>%
group_by(Region) %>%
summarise(value = mean(value)) %>%
arrange(value) %>%
head(1) %>%
select(Region) %>%
as.character(),
" in surplus"),
y="Value",x=NULL,
caption="Source : OCDE, 2015") +
geom_hline(aes(yintercept = mean(value))) +
geom_text(size = 4,
aes(2,
mean(value)+1.2,
label = paste0("Mean : ",mean(value) %>% round(0), "%"),
vjust = 1.2,
hjust = 0.1),
col = purple)+
scale_x_discrete(guide = guide_axis(n.dodge=1)) +
guides(fill=guide_legend(title="Skills"))+
theme(legend.position = "right",
legend.background = element_blank(),
panel.background = element_blank(),
legend.text = element_text(size = 8, family = font),
axis.line = element_line(),
axis.text.y = element_text(angle = 20),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))
Skill_micro_graph

Comparison_micro <- merge(OCDE_micro %>%
filter(country == Country1) %>%
select(skill2, value) %>%
`colnames<-`(c("Skill","Country1")),
OCDE_micro %>%
filter(country == Country2) %>%
select(skill2, value) %>%
`colnames<-`(c("Skill","Country2")))
Comparison_micro <- Comparison_micro %>%
rowwise() %>%
mutate(Mean = mean(c(Country1,Country2)),
Max = max(c(Country1,Country2))) %>%
arrange(Max)
Comparison_micro$Skill <- factor(Comparison_micro$Skill, levels=unique(Comparison_micro$Skill))
colors <- c("Country1" = purple, "Country2" = blue)
ggplot(Comparison_micro) +
geom_hline(yintercept = 0, col = grey)+
geom_hline(aes(color = "Country1", yintercept = mean(Country1)), alpha=0.5) +
geom_hline(aes(color = "Country2", yintercept = mean(Country2)), alpha=0.5) +
geom_segment(aes(x=Skill, xend=Skill, y=Country1, yend=Country2),color="grey", size=.5)+
geom_point(aes(x=Skill, y=Country1, color="Country1"), size=2) +
geom_point(aes(x=Skill, y=Country2, color="Country2"), size=2) +
coord_flip()+
geom_text(size = 3.5,
aes(4, mean(Country1)+1.2,
label = paste0("Mean : ",mean(Country1) %>% round(0), "%"),
vjust = 1.2, hjust = 0.1, col = "Country1"))+
geom_text(size = 3.5,
aes(2, mean(Country2)+1.2,
label = paste0("Mean : ",mean(Country2) %>% round(0), "%"),
vjust = 1.2, hjust = 0.1, col = "Country2"))+
scale_color_manual(values = colors, labels = c(Country1, Country2, "Range"))+
labs(title = paste("Skills imbalances between",Country1,"and",Country2),
subtitle = paste0("With a ",
((Comparison_micro$Country1 %>% mean())-(Comparison_micro$Country2 %>% mean())) %>% round(0) %>% abs(),"-point difference, ",
ifelse((Comparison_micro$Country1 %>% mean())>(Comparison_micro$Country2 %>% mean()),Country1,Country2),
" stands out for its need for skills"),
color = "Countries",
caption = "Source : OCDE, 2015", x = NULL, y = NULL)+
theme(legend.position = c(0.85,0.2),
legend.background = element_blank(),
panel.background = element_blank(),
legend.text = element_text(size = 8, family = font),
axis.line = element_line(),
axis.text.y = element_text(angle = 20),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))

Country1 <- "France"
wordcloud <- OCDE_micro %>%
filter(country == Country1) %>%
select(skill2, value) %>%
unnest_tokens(output = "word", input = skill2, token ="words") %>%
group_by(word) %>%
summarise(value = sum(value)) %>%
anti_join(stop_words, by = c("word")) %>%
filter(word != "management" | word != "resources") %>%
as.data.frame()
wordcloud <- rbind(wordcloud,c(toupper(Country1),wordcloud$value %>% max()+30))
wordcloud$value <- wordcloud$value %>% as.numeric()
wordcloud$value <- 1+(BBmisc::normalize(wordcloud$value, method = "range")*20) %>% round(1)
wordcloud(words = wordcloud$word, freq = wordcloud$value, min.freq = 0,
max.words=50, random.order=FALSE, rot.per=0, scale=c(4,.3), family =font,
colors=rev(c(purple,pink,pink, blue,blue, cyan)))

Imbalance <- OCDE_micro %>%
#filter(skill1 != "Complex Problem Solving Skills") %>%
mutate(balance = ifelse(value>0,"Surplus","Shortage"),
skill2 = fct_reorder(skill2,value))
Imbalance$skill1[Imbalance$skill1=="Complex Problem Solving Skills"] <- "Systems Skills"
Imbalance$skill1_clean <- gsub("[()]", " ", Imbalance$skill1) %>%
str_remove_all(" Skills ") %>%
str_remove_all(" Skills") %>%
str_replace_all(" ","\n")
Imbalance$skill1_clean[str_count(Imbalance$skill1_clean,"\n")>1] <- Imbalance$skill1_clean[str_count(Imbalance$skill1_clean,"\n")>1] %>% str_sub(end=-2)
Imbalance <- Imbalance %>% filter(country==Country1)
ggplot(data=Imbalance, aes(x=value, y=reorder(skill2, value), group=skill1_clean)) +
geom_bar(aes(fill= balance),stat = "identity", position = position_dodge()) +
scale_fill_manual(values=c(purple, blue))+
facet_grid(skill1_clean~., scales = "free", space = "free", switch = "x") +
labs(title = paste0((Imbalance %>% group_by(skill1) %>% summarise(value = mean(value)) %>% top_n(-1))$skill1,
" surplus and\n",
(Imbalance %>% group_by(skill1) %>% summarise(value = mean(value)) %>% top_n(1))$skill1,
" shortage for ",
Country1),
caption = "Source : OCDE, 2015", y=NULL, x=NULL)+
theme(strip.placement = "outside",
strip.background =element_rect(fill=blue),
strip.text = element_text(colour = 'white', face = "bold", family = font),
strip.text.y.right = element_text(angle = 0, family = font),
axis.text.y = element_text(angle = 20),
legend.position = "none",
legend.background = element_blank(),
panel.background = element_blank(),
legend.text = element_text(size = 8, family = font),
axis.line = element_line(),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))

Skill_search <- (OCDE_micro$skill2 %>% unique())[c(4,5,2,17)]
Skill_search_data <- OCDE_micro[OCDE_micro$skill2 %in% Skill_search,]
Skill_search_data <- Skill_search_data %>%
filter(country != "OECD", country != "European Union")
Skill_perso <- (Skill_search_data$value %>% mean() %>% round(0))
Skill_mean <- (OCDE_micro$value %>% mean() %>% round(0))
Skill_search_Top <- Skill_search_data %>%
group_by(country) %>%
summarise(Score=sum(value)) %>%
arrange(desc(Score)) %>%
head(5)
Skill_search_Flop <- Skill_search_data %>%
group_by(country) %>%
summarise(Score=sum(value)) %>%
arrange(Score) %>%
head(5)
Skill_search_Top <- rbind(Skill_search_Top,Skill_search_Flop)
Skill_search_data <- Skill_search_data[Skill_search_data$country %in% Skill_search_Top$country,]
Skill_search_data <- merge(Skill_search_data,Skill_search_Top)
Skill_search_data$country <- Skill_search_data$country %>% str_replace(" ","\n")
Skill_search_data <- Skill_search_data %>%
mutate(country = fct_reorder(country,value))
ggplot(Skill_search_data, aes(x = country, y=value, fill = skill2))+
geom_col(position = "dodge")+
geom_vline(xintercept = 5.5, col = blue)+
scale_fill_manual(values=colfunc2(Skill_search_data$skill2 %>% unique() %>% length()))+
annotate(geom="text",x=2,
y=max(Skill_search_data$value)-5,
col= purple,
fontface =2,
family = font,
label="Flop5\nSkills in surplus")+
annotate(geom="text",x=9,
y=min(Skill_search_data$value)+5,
col= blue,
fontface =2,
family = font,
label="Top5\nSkills in shortage")+
labs(title = "Top & Flop 5 most relevant countries according to selected skills",
subtitle =
ifelse(Skill_perso > Skill_mean,
paste0("Skills lacking because ",abs(Skill_perso-Skill_mean)," points above average"),
paste0("Skills in surplus because ",abs(Skill_perso-Skill_mean)," points below average")),
caption = "Source : OCDE, 2015",
y=NULL,
x=NULL,
fill = "Skills")+
#coord_flip()+
theme(legend.position = "right",
legend.background = element_blank(),
panel.background = element_blank(),
legend.text = element_text(size = 8, family = font),
axis.line = element_line(),
axis.text.x = element_text(angle = 20, vjust = .35),
plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
plot.caption = element_text(family = font, size = 10, color = blue))

library(data.tree)
library(circlepackeR)
population <- OCDE_micro %>%
filter(country == Country1) %>%
group_by(skill1, skill2) %>%
summarise(value = mean(value) %>% round(1)) %>%
ungroup() %>%
mutate(root="root",
surgroup = ifelse(value>0,"Shortage","Surplus")) %>%
select(root,surgroup,skill1,skill2,value) %>%
`colnames<-`(c("root","surgroup","group","subgroup","value")) %>%
mutate(subgroup = paste0(subgroup," (",value,")"),
pathString = paste("world",surgroup, group, subgroup, sep = "/"),
value= value+1+(value %>% min() %>% abs()),
pathString = pathString %>% str_replace_all(" ","\u000d"))
population <- as.Node(population)
p <- circlepackeR(population, size = "value", color_min = pink, color_max = "hsl(0, 0%, 0%)")
p
Skill_search <- (OCDE_micro$skill2 %>% unique())[c(4,5)]
Capital <- subset(world.cities) %>%
filter( country.etc %in% (OCDE_micro$country %>% unique()),
capital == 1) %>%
mutate(country = country.etc)
Skills_exchange <- OCDE_micro %>%
filter(country != "OECD", country != "European Union") %>%
filter(skill2 %in% Skill_search) %>%
group_by(skill2) %>%
filter(value == max(value) | value == min(value)) %>%
ungroup()
Skills_exchange <- merge(Skills_exchange,Capital) %>%
select(country, skill2, value, lat, long) %>%
arrange(skill2)
Skills_exchange$ID <- duplicated(ceiling((1:nrow(Skills_exchange))/2))
Skills_exchange <- Skills_exchange[!duplicated(paste(Skills_exchange$skill2,Skills_exchange$ID)),]
Skills_exchange1 <- Skills_exchange[Skills_exchange$ID,]
Skills_exchange2 <- Skills_exchange[!Skills_exchange$ID,]
rownames(Skills_exchange1) <- NULL
rownames(Skills_exchange2) <- NULL
colnames(Skills_exchange2) <- paste0(colnames(Skills_exchange2),1)
Skills_exchange_line <- cbind(Skills_exchange1,Skills_exchange2) %>%
select(country, country1, long, lat, long1, lat1) %>%
unique()
Skills_exchange_text <- Skills_exchange %>% select(country, lat, long) %>% unique()
map('world',
col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05,
mar=rep(0,4),border=0, ylim=c(-80,80)
)
points(x=Skills_exchange$long, y=Skills_exchange$lat, col=blue, cex=3, pch=20)
for (i in 1:nrow(Skills_exchange_line)){
inter <- gcIntermediate(c(Skills_exchange_line$long[i],Skills_exchange_line$lat[i]),
c(Skills_exchange_line$long1[i],Skills_exchange_line$lat1[i]),
n=50, addStartEnd=TRUE, breakAtDateLine=F)
lines(inter, col=blue, lwd=2)
}
text(Skills_exchange_text$country, x=Skills_exchange_text$long, y=Skills_exchange_text$lat, col=blue, cex=1, pos=4)
