Init notebook

if(!"tidyverse" %in% installed.packages()) {install.packages("tidyverse")}
if(!"RColorBrewer" %in% installed.packages()) {install.packages("RColorBrewer")}
if(!"ggsankey" %in% installed.packages()) {devtools::install_github("davidsjoberg/ggsankey")}
library("tidyverse")
library("RColorBrewer")
library("ggsankey")

Leer fichero

asesores <- read_csv("https://raw.githubusercontent.com/congosto/congosto.github.io/refs/heads/master/asesores_cientificos.csv") %>%
  mutate(`rango h-index` = dplyr::case_when(
    hindex <= 5 ~ "h-index 00-05",
    hindex <= 10 ~ "h-index 06-10",
    hindex <= 15 ~ "h-index 11-15",
    hindex <= 20 ~ "h-index 16-20",
    hindex <= 25 ~ "h-index 21-25",
    hindex <= 30 ~ "h-index 26-30",
    hindex <= 35 ~ "h-index 31-35",
    hindex <= 40 ~ "h-index 35-40",
    is.na(hindex) ~ "Desconocido",
    .default = NA)
  )

Distribución de asesores científicos ministeriales por tipo de ciencia

sankey_ciencia <-asesores %>%
  arrange(desc(ciencia)) %>%
  make_long( ciencia, ministerio)  %>%
  mutate(color_flow = dplyr::case_when(
      x == "ciencia" ~ node,
      .default = lag(node)
      ) 
)
TotalCount = nrow(asesores)
dagg <- sankey_ciencia%>%
  dplyr::group_by(node)%>%
  tally() %>%
  dplyr::group_by(node)%>%
  dplyr::mutate(pct = n/TotalCount)


# Step 3
df2 <- merge(sankey_ciencia, dagg, by.x = 'node', by.y = 'node', all.x = TRUE)
color_ciencia = c("Ciencias formales" = "#ef666c","Ciencias sociales" = "#f79312", "Ciencias naturales" = "#43b440", "Ingeniería" ="#1dafde")
ggplot(
  df2,
  aes(x = x, 
    next_x = next_x, 
    node = node, 
    next_node = next_node,
    label = ifelse( x== "ciencia",paste0(node," n=", n, ' (',  round(pct* 100,1), '%)' ), node),
    fill = color_flow
  )
) +
  geom_sankey(
    flow.alpha = 0.8,
    node.width = 0.8,
    flow.width = 100,
    size = 1000,
    space = 15
  ) +
 geom_sankey_label(
   aes( hjust = dplyr::case_when(
        x == "ciencia" ~ 1,
        x == "ministerio" ~ 0,
        .default = .5
          )
   ),
    size = 4.5,
    color = "white",
    space = 15
  ) +
  labs (
    title = "Distribución de asesores científicos ministeriales por tipo de ciencia",
    x= ""
  ) +  
  scale_fill_manual(values = color_ciencia) +
  theme_sankey(base_size = 18) +
  theme(
    legend.position='none',
    plot.title=element_text(color = "#5a5856")
  )

Distribución de asesores científicos ministeriales por sexo

sankey_sexo <-asesores %>%
  arrange(sexo,ciencia) %>%
  make_long( sexo, ministerio)  %>%
  mutate(color_flow = dplyr::case_when(
      x == "sexo" ~ node,
      x == "ministerio" ~ lag(node),
      .default = lag(node,2)
      ) 
)
TotalCount = nrow(asesores)
dagg <- sankey_sexo%>%
  dplyr::group_by(node)%>%
  tally() %>%
  dplyr::group_by(node)%>%
  dplyr::mutate(pct = n/TotalCount)


# Step 3
df2 <- merge(sankey_sexo, dagg, by.x = 'node', by.y = 'node', all.x = TRUE)
color_sexo = c("H" = "#40a6b4","M" = "#eb8cee")
ggplot(
  df2,
  aes(x = x, 
    next_x = next_x, 
    node = node, 
    next_node = next_node,
    label = ifelse(x == "sexo",paste0(node," n=", n, ' (',  round(pct* 100,1), '%)' ), node),
    fill = color_flow
  )
) +
  geom_sankey(
    flow.alpha = 0.8,
    node.width = 0.8,
    flow.width = 10,
    size = 5,
    space = 15
  ) +
  geom_sankey_label(
    aes( hjust = dplyr::case_when(
        x == "sexo" ~ 1,
        x == "ministerio" ~ 0,
        .default = .5
          )
   ),
    size = 4,
    color = "white",
    space = 15
  ) +
  labs (
    title = "Distribución de asesores científicos ministeriales por sexo",
    x= ""
  ) +  
  scale_fill_manual(values = color_sexo) +
  theme_sankey(base_size = 18) +
  theme(
    legend.position='none',
    plot.title=element_text(color = "#5a5856")
  )

Distribución de asesores científicos ministeriales por h-index

sankey_hindex <-asesores %>%
  arrange(desc(`rango h-index`)) %>%
  make_long( `rango h-index`, ministerio)  %>%
  mutate(color_flow = dplyr::case_when(
      x == "rango h-index" ~ node,
      .default = lag(node)
      ) 
)
TotalCount = nrow(asesores)
dagg <- sankey_hindex%>%
  dplyr::group_by(node)%>%
  tally() %>%
  dplyr::group_by(node)%>%
  dplyr::mutate(pct = n/TotalCount)


# Step 3
df2 <- merge(sankey_hindex, dagg, by.x = 'node', by.y = 'node', all.x = TRUE)
color_hindex = c("desconocido" = "grey", "h-index 00-05" = "#FDAE6B","h-index 06-10" = "#F16913" , "h-index 11-15" = "#8C2D04",
                 "h-index 16-20" = "#A6BDDB", "h-index 21-25"= "#74A9CF", "h-index 26-30" = "#3690C0",
                  "h-index 31-35"= "#0570B0",  "h-index 35-40" = "#034E7B")
ggplot(
  df2,
  aes(x = x, 
    next_x = next_x, 
    node = node, 
    next_node = next_node,
    label = ifelse(x == "rango h-index",paste0(node," n=", n, ' (',  round(pct* 100,1), '%)' ), node),
    fill = color_flow
  )
) +
  geom_sankey(
    flow.alpha = 0.8,
    node.width = 0.8,
    space = 15
  ) +
 geom_sankey_label(
   aes( hjust = dplyr::case_when(
        x == "rango h-index" ~ 1,
        x == "ministerio" ~ 0,
        .default = .5
        )
   ),
    size = 4.5,
    color = "white",
    space = 15
  ) +
  labs (
    title = "Distribución de asesores científicos ministeriales por rango h-index",
    x= ""
  ) + 
  scale_fill_manual(values = color_hindex) +
  theme_sankey(base_size = 18) +
  theme(
    legend.position='none',
    plot.title=element_text(color = "#5a5856")
  )

Distrubición de asesores científicos ministeriales por cargo

sankey_cargo <-asesores %>%
  arrange(desc(cargo)) %>%
  make_long( cargo, ministerio)  %>%
  mutate(color_flow = dplyr::case_when(
      x == "cargo" ~ node,
      .default = lag(node)
      ) 
)
TotalCount = nrow(asesores)
dagg <- sankey_cargo%>%
  dplyr::group_by(node)%>%
  tally() %>%
  dplyr::group_by(node)%>%
  dplyr::mutate(pct = n/TotalCount)


df2 <- merge(sankey_cargo, dagg, by.x = 'node', by.y = 'node', all.x = TRUE)

print ( brewer.pal(6,'Dark2'))
## [1] "#1B9E77" "#D95F02" "#7570B3" "#E7298A" "#66A61E" "#E6AB02"
color_cargo <- c( "Catedrática/o" = "#1B9E77", "Científica/o titular" = "#D95F02", "Funcionaria/o" = "#7570B3",
                  "Oficial científica/o" = "#E7298A", "Profesor/a" = "#66A61E", "Profesor/a titular" ="#E6AB02")
ggplot(
  df2,
  aes(x = x, 
    next_x = next_x, 
    node = node, 
    next_node = next_node,
    label = ifelse( x== "cargo",paste0(node," n=", n, ' (',  round(pct* 100,1), '%)' ), node),
    fill = color_flow
  )
) +
  geom_sankey(
    flow.alpha = 0.8,
    node.width = 0.8,
    flow.width = 100,
    size = 1000,
    space = 15
  ) +
 geom_sankey_label(
   aes( hjust = dplyr::case_when(
        x == "cargo" ~ 1,
        x == "ministerio" ~ 0,
        .default = .5
          )
   ),
    size = 4.5,
    color = "white",
    space = 15
  ) +
  labs (
    title = "Distribución de asesores científicos ministeriales por cargo",
    x= ""
  ) +  
  scale_fill_manual (values = color_cargo) + # Paleta categórica
  theme_sankey(base_size = 18) +
  theme(
    legend.position='none',
    plot.title=element_text(color = "#5a5856")
  )

Distrubición de asesores científicos ministeriales por tipo de ciencia, h-index y cargo

perfiles <-asesores %>%
  select(ciencia, sexo,`rango h-index`, cargo, ministerio) %>%
  pivot_longer(
    ciencia | sexo | `rango h-index` | cargo ,
     names_to="caracteristicas",
     values_to="perfil"
  ) %>%
  mutate (perfil_ajustado = str_pad(perfil, width = 25, side = "right" ))
order_perfil <- c("ciencia","sexo","rango h-index", "cargo")
perfiles$caracteristicas <- factor(perfiles$caracteristicas, order_perfil)
color_perfil <- c(color_ciencia, color_sexo, color_hindex,color_cargo)
ggplot(
  data = perfiles,
  aes(
    x = caracteristicas,
    y = ministerio,
    fill=perfil
  )
) +
  geom_tile( aes(width = 2, height = 1))+
  #geom_tile()+
  geom_text(
    aes(
      label = perfil_ajustado
     ),
    color="white",
    size =4,
    hjust=1,
    vjust=.5 
  ) +
  scale_fill_manual(values = color_perfil)+
  scale_x_discrete(
    expand =  c(0,1)
  ) +
  labs(x = "",
       y = "" ,
       title = "Perfil de los asesores científicos ministeriales",
       fill = "",
       caption = "@congosto\nFuente: onac.gob.es") +
  theme_light(base_size = 18) +
  theme (
    legend.position="none",
    plot.title=element_text(color = "#5a5856"),
    axis.title.y=element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank())