#devtools::install_github("ricardo-bion/ggradar", dependencies = TRUE)

library(tidyverse)
library(fmsb)
library(showtext)
library(scales)
library(ggradar)
library(ggplot2)
library(rnaturalearth)
library(countrycode)
library(plotly)
library(leaflet)
library(wordcloud)
library(SnowballC)
library(tidytext)
library(maps)
library(geosphere)

purple <- "#E83151"
pink <- "#D36582"
blue <- "#255C99"
yellow <- "#F7B267"
cyan <- "#01BAEF"
green <- "#16DB93"
grey <- "#EFEFEF"

colfunc <- colorRampPalette(c(purple, blue, green, cyan))
colfunc2 <- colorRampPalette(c(blue, cyan, grey, pink, purple))



font_add_google("Lato", "lato")
font <- "lato"

# Showtext will be automatically invoked when needed
showtext_auto()

load(file = "OECD skill job level 1.RData")
load(file = "OECD skill job level 2.RData")

OCDE_macro <- sjd1
OCDE_micro <- sjd2

rm(sjd1,sjd2)

OCDE_micro$country[OCDE_micro$country=="United States"] <- "United States of America"
OCDE_micro$country[OCDE_micro$country=="Slovak Republic"] <- "Slovakia"
OCDE_macro$country[OCDE_macro$country=="United States"] <- "United States of America"
OCDE_macro$country[OCDE_macro$country=="Slovak Republic"] <- "Slovakia"

Countries_list <- OCDE_macro$country %>% unique()

OCDE_macro$value <- OCDE_macro$value*100
OCDE_micro$value <- OCDE_micro$value*100

Skills <- OCDE_macro$skill1 %>% unique()

world <- ne_countries() %>%
  as.data.frame() %>%
  filter(admin %in% Countries_list) %>%
  select(admin,pop_est,gdp_md_est) %>% 
  mutate(Region = countrycode(sourcevar = admin, origin = "country.name",destination = "region")) %>% 
  `colnames<-`(c("country","population","GDP","Region"))
Skill_to_analyse <- Skills[3]

Skill_data <- merge(OCDE_macro, world) %>%
  filter(skill1 == Skill_to_analyse) %>%
  mutate(country = fct_reorder(country,value))

Skill_graph <- ggplot(Skill_data, aes(x = country, y=value, fill= Region))+
  geom_col()+
  geom_text(size = 2.5, colour = "white", fontface = "bold", family = font,
            aes(y = ifelse(value<0,value+1.5,value-1.5), label = paste0(value %>% round(0),"%")))+
  scale_fill_manual(values=colfunc(4))+
  #geom_flag(x = 0, aes(image = Flag)) +
  coord_flip()+
  labs(title=paste0(Skill_to_analyse,
                    " imbalances :\n",
                    nrow(Skill_data %>% filter(value > 0)),
                    " countries in need"),
       subtitle= paste0(Skill_data %>% group_by(Region) %>% summarise(value = mean(value)) %>% arrange(value) %>% head(1) %>% select(Region) %>% as.character()," in surplus"),
       y="Value",x=NULL,
       caption="Source : OCDE, 2015") +
  geom_hline(aes(yintercept = mean(value))) +
  geom_text(size = 4,
            aes(2,
                mean(value)+1.1,
                label = paste0("Mean : ",mean(value) %>% round(0), "%"),
                vjust = 1.2,
                hjust = 0.1),
            col = purple)+
  scale_x_discrete(guide = guide_axis(n.dodge=1)) +
  theme(legend.position = c(0.85,0.2),
        legend.background = element_blank(),
        panel.background = element_blank(),
        legend.text = element_text(size = 8, family = font),
        axis.line = element_line(),
        axis.text.y = element_text(angle = 20),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))

Skill_graph

map <- ne_countries()
#map <- map[map$admin %in% Countries_list,]

map$value <- Skill_data$value[match(map$admin,Skill_data$country)]


pal <- colorBin(
  palette = c(blue, grey, grey, purple), domain = map$value, na.color = "white",
  bins = seq(-20, 40, by = 5)
)

#map$value %>% max(na.rm = T)

map$labels <- paste0(
  "<strong> Country: </strong> ",
  map$admin, "<br/> ",
  "<strong> Skills: </strong> ",
  round(map$value,2), "<br/> "
) %>%
  lapply(htmltools::HTML)
#Building the leaflet map
LeafMap <- leaflet(map) %>%
  setView(lng = 0, lat = 30, zoom = 1.3) %>%
  addPolygons(
    fillColor = ~ pal(value),
    color = blue,
    weight = 1,
    opacity = 1,
    fillOpacity = 1,
    label = ~labels,
    highlight = highlightOptions(
      color = cyan,
      bringToFront = TRUE,
      fill = 1, fillOpacity=1
    )
  ) %>%
  leaflet::addLegend(
    pal = pal, values = ~value,
    opacity = 1, title = "Skills imbalances"
  )
LeafMap
Country_graph <- merge(OCDE_macro, world) %>%
  filter(country != "United States of America") %>%
  group_by(country) %>% 
  summarise(value = mean(value),
            GDP = mean(GDP),
            population = mean(population)) %>% 
  mutate(GDP = GDP/1000000,
         population = population/1000000)

Choice_GDP_Pop <- "GDP"

ifelse(Choice_GDP_Pop == "Population",
       Country_graph <- Country_graph %>% select(-GDP),
       Country_graph <- Country_graph %>% select(-population))

colnames(Country_graph)[3] <- "infos"


Country_plot <- ggplot(Country_graph, aes(x = value, y = infos, color = value))+
  geom_vline(xintercept = 0, col = grey)+
  geom_point()+
  geom_text(aes(label = country),
            family = font,
            hjust=-0.1,
            vjust=-0.5,
            size = 3,
            check_overlap = T)+
  geom_smooth(color = blue, method = lm, se = FALSE)+
  scale_color_gradient(low = purple, high = blue)+
  guides(color = guide_colorsteps(barwidth = 15,
                                  barheight = 0.5,
                                  title.position = "top",
                                  title.hjust = 0.5,
                                  title="Scale where -100 represents a surplus\n of skills and 100 a lack"))+
  ylim(0,0.5+Country_graph$infos %>% max())+
  scale_x_continuous(
    limits = c(-15,45),
    breaks = c(-15, 0, 15, 30, 45))+
  labs(title=paste0("The larger the ",Choice_GDP_Pop,", the greater the skills shortage"),
       subtitle= paste0("The extremes : ",
                        Country_graph %>% arrange(value) %>% head(1) %>% select(1) %>% as.character(),
                        " in surplus and ",
                        Country_graph %>% arrange(desc(value)) %>% head(1) %>% select(1) %>% as.character(),
                        " in shortage"),
       x=NULL, y=paste0(Choice_GDP_Pop," (million)"),
       caption="Source : OCDE, 2015") +
  theme(legend.background = element_blank(),
        legend.position = "bottom",
        panel.background = element_blank(),
        legend.text = element_text(size = 8, family = font),
        axis.line = element_line(),
        axis.text.y = element_text(angle = 0, family = font),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))

Country_plot

#Pivot my dataframe
Skill_radar <-xtabs(formula=value~country+skill1,data=OCDE_macro) %>%
  as.data.frame.matrix()

Skill_radar <- Skill_radar %>%
  mutate(Country = row.names(Skill_radar)) %>% 
  select(Country, everything()) %>% 
  mutate_at(vars(-Country), rescale) %>% 
  `rownames<-`(1:nrow(Skill_radar))

colnames(Skill_radar) <- gsub("[()]", " ", colnames(Skill_radar)) %>% 
  str_remove_all("Skills") %>%
  str_remove_all("Basic") %>%
  str_replace_all(" ","\n") %>% 
  str_remove_all("\n\n")

#County for comparison
Country1 <- "France" #Countries_list[2]
Country2 <- "Spain" #Countries_list[16]
Comparison <- c(Country1, Country2)


#plot my radarchart
Skill_radar_graph <- Skill_radar %>%
  filter(Country %in% Comparison) %>% 
  ggradar(font.radar = font,
          grid.label.size = 4,  # Affects the grid annotations (0%, 50%, etc.)
          axis.label.size = 4,
          group.point.size = 5,   # Simply the size of the point 
          group.colours = c(blue, purple))+
  labs(title = paste("Skills imbalances between",Country1,"and",Country2),
       caption = "Source : OCDE, 2015")+
  theme(legend.position = c(-0.1,0.2),
        legend.justification = "left",
        legend.text = element_text(size = 10, family = font),
        legend.key = element_rect(fill = NA, color = NA),
        legend.background = element_blank(),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))


Skill_radar_graph

Skill_micro <- OCDE_micro %>% 
  group_by(skill1, skill2) %>% 
  summarise(value = mean(value)) %>%
  ungroup() %>% 
  mutate(skill2 = fct_reorder(skill2,value))


Skill_micro_graph <- ggplot(Skill_micro, aes(x = skill2, y=value, fill = skill1))+
  geom_col()+
  geom_text(size = 2.5, colour = "white", fontface = "bold", family = font,
            aes(y = ifelse(value<0,value+1.5,value-1.5), label = paste0(value %>% round(0),"%")))+
  scale_fill_manual(values=colfunc2(7))+
  coord_flip()+
  labs(title=paste0(Skill_to_analyse,
                    " imbalances : ",
                    nrow(Skill_data %>% filter(value > 0)),
                    " countries in need"),
       subtitle= paste0(Skill_data %>%
                          group_by(Region) %>%
                          summarise(value = mean(value)) %>%
                          arrange(value) %>%
                          head(1) %>%
                          select(Region) %>%
                          as.character(),
                        " in surplus"),
       y="Value",x=NULL,
       caption="Source : OCDE, 2015") +
  geom_hline(aes(yintercept = mean(value))) +
  geom_text(size = 4,
            aes(2,
                mean(value)+1.2,
                label = paste0("Mean : ",mean(value) %>% round(0), "%"),
                vjust = 1.2,
                hjust = 0.1),
            col = purple)+
  scale_x_discrete(guide = guide_axis(n.dodge=1)) +
  guides(fill=guide_legend(title="Skills"))+
  theme(legend.position = "right",
        legend.background = element_blank(),
        panel.background = element_blank(),
        legend.text = element_text(size = 8, family = font),
        axis.line = element_line(),
        axis.text.y = element_text(angle = 20),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))

Skill_micro_graph

Comparison_micro <- merge(OCDE_micro %>% 
  filter(country == Country1) %>% 
  select(skill2, value) %>% 
  `colnames<-`(c("Skill","Country1")),
  OCDE_micro %>% 
  filter(country == Country2) %>% 
  select(skill2, value) %>% 
  `colnames<-`(c("Skill","Country2")))

Comparison_micro <- Comparison_micro %>%
  rowwise() %>%
  mutate(Mean = mean(c(Country1,Country2)),
         Max = max(c(Country1,Country2))) %>% 
  arrange(Max)

Comparison_micro$Skill <- factor(Comparison_micro$Skill, levels=unique(Comparison_micro$Skill))

colors <- c("Country1" = purple, "Country2" = blue)

ggplot(Comparison_micro) +
  geom_hline(yintercept = 0, col = grey)+
  geom_hline(aes(color = "Country1", yintercept = mean(Country1)), alpha=0.5) +
  geom_hline(aes(color = "Country2", yintercept = mean(Country2)), alpha=0.5) +
  geom_segment(aes(x=Skill, xend=Skill, y=Country1, yend=Country2),color="grey", size=.5)+
  geom_point(aes(x=Skill, y=Country1, color="Country1"), size=2) +
  geom_point(aes(x=Skill, y=Country2, color="Country2"), size=2) +
  coord_flip()+
  geom_text(size = 3.5,
            aes(4, mean(Country1)+1.2,
                label = paste0("Mean : ",mean(Country1) %>% round(0), "%"),
                vjust = 1.2, hjust = 0.1, col = "Country1"))+
  geom_text(size = 3.5,
            aes(2, mean(Country2)+1.2,
                label = paste0("Mean : ",mean(Country2) %>% round(0), "%"),
                vjust = 1.2, hjust = 0.1, col = "Country2"))+
  scale_color_manual(values = colors, labels = c(Country1, Country2, "Range"))+
  labs(title = paste("Skills imbalances between",Country1,"and",Country2),
       subtitle = paste0("With a ",
                         ((Comparison_micro$Country1 %>% mean())-(Comparison_micro$Country2 %>% mean())) %>% round(0) %>% abs(),"-point difference, ",
                         ifelse((Comparison_micro$Country1 %>% mean())>(Comparison_micro$Country2 %>% mean()),Country1,Country2),
                         " stands out for its need for skills"),
       color = "Countries",
       caption = "Source : OCDE, 2015", x = NULL, y = NULL)+
  theme(legend.position = c(0.85,0.2),
        legend.background = element_blank(),
        panel.background = element_blank(),
        legend.text = element_text(size = 8, family = font),
        axis.line = element_line(),
        axis.text.y = element_text(angle = 20),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))

Country1 <- "France"

wordcloud <- OCDE_micro %>%
  filter(country == Country1) %>% 
  select(skill2, value) %>%
  unnest_tokens(output = "word", input = skill2, token ="words") %>% 
  group_by(word) %>% 
  summarise(value = sum(value)) %>%
  anti_join(stop_words, by = c("word")) %>%
  filter(word != "management" | word != "resources") %>%
  as.data.frame()

wordcloud <- rbind(wordcloud,c(toupper(Country1),wordcloud$value %>% max()+30))
wordcloud$value <- wordcloud$value %>% as.numeric()
wordcloud$value <- 1+(BBmisc::normalize(wordcloud$value, method = "range")*20) %>% round(1)

wordcloud(words = wordcloud$word, freq = wordcloud$value, min.freq = 0,
          max.words=50, random.order=FALSE, rot.per=0, scale=c(4,.3), family =font,
          colors=rev(c(purple,pink,pink, blue,blue, cyan)))

Imbalance <- OCDE_micro %>%
  #filter(skill1 != "Complex Problem Solving Skills") %>% 
  mutate(balance = ifelse(value>0,"Surplus","Shortage"),
         skill2 = fct_reorder(skill2,value))

Imbalance$skill1[Imbalance$skill1=="Complex Problem Solving Skills"] <- "Systems Skills"

Imbalance$skill1_clean <- gsub("[()]", " ", Imbalance$skill1) %>% 
  str_remove_all(" Skills ") %>%
  str_remove_all(" Skills") %>%
  str_replace_all(" ","\n")

Imbalance$skill1_clean[str_count(Imbalance$skill1_clean,"\n")>1] <- Imbalance$skill1_clean[str_count(Imbalance$skill1_clean,"\n")>1] %>% str_sub(end=-2)

Imbalance <- Imbalance %>% filter(country==Country1)



ggplot(data=Imbalance, aes(x=value, y=reorder(skill2, value), group=skill1_clean)) +
  geom_bar(aes(fill= balance),stat = "identity", position = position_dodge()) +
  scale_fill_manual(values=c(purple, blue))+
  facet_grid(skill1_clean~., scales = "free", space = "free", switch = "x") + 
  labs(title = paste0((Imbalance %>% group_by(skill1) %>% summarise(value = mean(value)) %>% top_n(-1))$skill1,
       " surplus and\n",
       (Imbalance %>% group_by(skill1) %>% summarise(value = mean(value)) %>% top_n(1))$skill1,
       " shortage for ",
       Country1),
       caption = "Source : OCDE, 2015", y=NULL, x=NULL)+
  theme(strip.placement = "outside",
        strip.background =element_rect(fill=blue),
        strip.text = element_text(colour = 'white', face = "bold", family = font),
        strip.text.y.right = element_text(angle = 0, family = font),
        axis.text.y = element_text(angle = 20),
        legend.position = "none",
        legend.background = element_blank(),
        panel.background = element_blank(),
        legend.text = element_text(size = 8, family = font),
        axis.line = element_line(),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))

Skill_search <- (OCDE_micro$skill2 %>% unique())[c(4,5,2,17)]

Skill_search_data <- OCDE_micro[OCDE_micro$skill2 %in% Skill_search,]

Skill_search_data <- Skill_search_data %>% 
  filter(country != "OECD", country != "European Union")

Skill_perso <- (Skill_search_data$value %>% mean() %>% round(0))
Skill_mean <- (OCDE_micro$value %>% mean() %>% round(0))

       
Skill_search_Top <- Skill_search_data %>%
  group_by(country) %>%
  summarise(Score=sum(value)) %>%
  arrange(desc(Score)) %>%
  head(5)

Skill_search_Flop <- Skill_search_data %>%
  group_by(country) %>%
  summarise(Score=sum(value)) %>%
  arrange(Score) %>%
  head(5)

Skill_search_Top <- rbind(Skill_search_Top,Skill_search_Flop)
Skill_search_data <- Skill_search_data[Skill_search_data$country %in% Skill_search_Top$country,]
Skill_search_data <- merge(Skill_search_data,Skill_search_Top)

Skill_search_data$country <- Skill_search_data$country %>% str_replace(" ","\n")

Skill_search_data <- Skill_search_data %>%
  mutate(country = fct_reorder(country,value))


ggplot(Skill_search_data, aes(x = country, y=value, fill = skill2))+
  geom_col(position = "dodge")+
  geom_vline(xintercept = 5.5, col = blue)+
  scale_fill_manual(values=colfunc2(Skill_search_data$skill2 %>% unique() %>% length()))+
  annotate(geom="text",x=2,
           y=max(Skill_search_data$value)-5,
           col= purple,
           fontface =2,
           family = font,
           label="Flop5\nSkills in surplus")+
  annotate(geom="text",x=9,
           y=min(Skill_search_data$value)+5,
           col= blue,
           fontface =2,
           family = font,
           label="Top5\nSkills in shortage")+
  labs(title = "Top & Flop 5 most relevant countries according to selected skills",
       subtitle =
       ifelse(Skill_perso > Skill_mean,
              paste0("Skills lacking because ",abs(Skill_perso-Skill_mean)," points above average"),
              paste0("Skills in surplus because ",abs(Skill_perso-Skill_mean)," points below average")),
       caption = "Source : OCDE, 2015",
       y=NULL,
       x=NULL,
       fill = "Skills")+
  #coord_flip()+
  theme(legend.position = "right",
        legend.background = element_blank(),
        panel.background = element_blank(),
        legend.text = element_text(size = 8, family = font),
        axis.line = element_line(),
        axis.text.x = element_text(angle = 20, vjust = .35),
        plot.title = element_text(family = font, size = 16, face = "bold", color = blue),
        plot.caption = element_text(family = font, size = 10, color = blue))

library(data.tree)
library(circlepackeR)

population <- OCDE_micro %>%
  filter(country == Country1) %>% 
  group_by(skill1, skill2) %>% 
  summarise(value = mean(value) %>% round(1)) %>%
  ungroup() %>% 
  mutate(root="root",
         surgroup = ifelse(value>0,"Shortage","Surplus")) %>%
  select(root,surgroup,skill1,skill2,value) %>%
  `colnames<-`(c("root","surgroup","group","subgroup","value")) %>% 
  mutate(subgroup = paste0(subgroup," (",value,")"),
         pathString = paste("world",surgroup, group, subgroup, sep = "/"),
         value= value+1+(value %>% min() %>% abs()),
         pathString = pathString %>% str_replace_all(" ","\u000d"))

population <- as.Node(population)

p <- circlepackeR(population, size = "value", color_min = pink, color_max = "hsl(0, 0%, 0%)")

p
Skill_search <- (OCDE_micro$skill2 %>% unique())[c(4,5)]

Capital <- subset(world.cities) %>% 
  filter( country.etc %in% (OCDE_micro$country %>% unique()),
          capital == 1) %>% 
  mutate(country = country.etc)

Skills_exchange <- OCDE_micro %>%
  filter(country != "OECD", country != "European Union") %>%
  filter(skill2 %in% Skill_search) %>% 
  group_by(skill2) %>% 
  filter(value == max(value) | value == min(value)) %>%
  ungroup()

Skills_exchange <- merge(Skills_exchange,Capital) %>%
  select(country, skill2, value, lat, long) %>% 
  arrange(skill2)

Skills_exchange$ID <- duplicated(ceiling((1:nrow(Skills_exchange))/2))
Skills_exchange <- Skills_exchange[!duplicated(paste(Skills_exchange$skill2,Skills_exchange$ID)),]

Skills_exchange1 <- Skills_exchange[Skills_exchange$ID,]
Skills_exchange2 <- Skills_exchange[!Skills_exchange$ID,]

rownames(Skills_exchange1) <- NULL
rownames(Skills_exchange2) <- NULL

colnames(Skills_exchange2) <- paste0(colnames(Skills_exchange2),1)

Skills_exchange_line <- cbind(Skills_exchange1,Skills_exchange2) %>%
  select(country, country1, long, lat, long1, lat1) %>% 
  unique()

Skills_exchange_text <- Skills_exchange %>% select(country, lat, long) %>% unique()

map('world',
    col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05,
    mar=rep(0,4),border=0, ylim=c(-80,80) 
)
points(x=Skills_exchange$long, y=Skills_exchange$lat, col=blue, cex=3, pch=20)
for (i in 1:nrow(Skills_exchange_line)){
  inter <- gcIntermediate(c(Skills_exchange_line$long[i],Skills_exchange_line$lat[i]),
                        c(Skills_exchange_line$long1[i],Skills_exchange_line$lat1[i]),
                        n=50, addStartEnd=TRUE, breakAtDateLine=F)
  lines(inter, col=blue, lwd=2)
}
text(Skills_exchange_text$country, x=Skills_exchange_text$long, y=Skills_exchange_text$lat,  col=blue, cex=1, pos=4)