Introducción

Desarrollo de grafos aplicados en proyectos de Organizational Network Analysis.

pacman::p_load(tidyverse, onadata, visNetwork, igraph, networkD3)

Programa de referidos

El dataset Vamos a trabajar con un dataset armado con los contactos de LinkedIn de Inés Murtagh, Sofía Weintraub y Josefina Soto Acebal.

# Carga de datos
contactos <- read_delim("/Users/inesmurtagh/Downloads/Connections.csv", delim = ";")
glimpse(contactos)
## Rows: 1,088
## Columns: 7
## $ Origen         <chr> "Sofia Weintraub", "Sofia Weintraub", "Sofia Weintraub"…
## $ Contact        <chr> "Katerina Postel", "Tomás Falaq", "Martin Hadid", "Tomá…
## $ URL            <chr> "https://www.linkedin.com/in/katerina-postel-a21aa5199"…
## $ Company        <chr> "Accenture", "Natura &Co", "Google", "Growketing", "Ste…
## $ Position       <chr> "Procurement Operations Senior Analyst", "Becario de fi…
## $ `Connected On` <chr> "16 Apr 2024", "16 Apr 2024", "15 Apr 2024", "15 Apr 20…
## $ ...7           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…

Nos quedamos con los datos de aquellos contactos que trabajan en algun area relacionada con Datos.

# Filtrar donde la posición diga "Data Scientist", "Data Analyst" o "Data Analytics"
keywords <- c('data', 'datos', 'CDO')
datos <- contactos %>% filter(str_detect(Position, regex(paste(keywords, collapse="|"), ignore_case=TRUE, perl=TRUE)))


glimpse(datos)
## Rows: 101
## Columns: 7
## $ Origen         <chr> "Sofia Weintraub", "Sofia Weintraub", "Sofia Weintraub"…
## $ Contact        <chr> "Tomás Zanchetti", "Tomás Said", "Pablo Castro", "Ramón…
## $ URL            <chr> "https://www.linkedin.com/in/tomas-zanchetti", "https:/…
## $ Company        <chr> "Growketing", "KimotionStudio", "Novakorp", "Ripley Chi…
## $ Position       <chr> "Head of Data & Technology", "Consultor Data Science", …
## $ `Connected On` <chr> "15 Apr 2024", "09 Apr 2024", "04 Apr 2024", "28-mar-24…
## $ ...7           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
# Identificamos los datos únicos de origen
origen <- datos %>% distinct(Origen) %>% rename(label=Origen)

# Identificamos los datos únicos de contacto
contacto <- datos %>% distinct(Contact) %>% rename(label=Contact)
# Unificamos los dos dataframes
nodes <- full_join(origen, contacto, by = "label")

# Añadimos una columna de ID
nodes <- nodes %>% rowid_to_column("id")

nodes
## # A tibble: 77 × 2
##       id label               
##    <int> <chr>               
##  1     1 Sofia Weintraub     
##  2     2 Ines Murtagh        
##  3     3 Josefina Soto Acebal
##  4     4 Tomás Zanchetti     
##  5     5 Tomás Said          
##  6     6 Pablo Castro        
##  7     7 Ramón Murias Palomer
##  8     8 Santiago Ruffini    
##  9     9 Sebastian Kleiner   
## 10    10 Pablo Gomez         
## # ℹ 67 more rows
# Calculamos la cantidad de conexiones entre Origen y Contacto
conexion <- datos %>% 
  group_by(Origen, Contact) %>% 
  summarise(peso = n()) %>% 
  ungroup()

# Indico a las aristas cuál es el nodo de origen y cuál el de destino
aristas <- conexion %>% 
  left_join(nodes, by = c("Origen" = "label")) %>% 
  rename(from = id)

aristas <- aristas %>% 
  left_join(nodes, by = c("Contact" = "label")) %>% 
  rename(to = id)

# Ver el dataset
aristas
## # A tibble: 101 × 5
##    Origen       Contact                   peso  from    to
##    <chr>        <chr>                    <int> <int> <int>
##  1 Ines Murtagh AGUSTIN LARA ACOSTA          1     2    40
##  2 Ines Murtagh Abril Noguera                1     2    60
##  3 Ines Murtagh Azul de los Ángeles Makk     1     2    65
##  4 Ines Murtagh Bruno Soifer                 1     2    49
##  5 Ines Murtagh Camila Collado               1     2    42
##  6 Ines Murtagh Camila Pettinato             1     2    41
##  7 Ines Murtagh Delfina Fiorellino           1     2    67
##  8 Ines Murtagh Ezequiel Eliano Sombory      1     2    59
##  9 Ines Murtagh Ezequiel Shinzato            1     2    66
## 10 Ines Murtagh Francisca Sulzberger         1     2    64
## # ℹ 91 more rows
# Seleccionamos algunas columnas
aristas <- select(aristas, from, to, peso)

Creación del grafo

edges <- mutate(aristas, width = peso/5 + 1)

nodes$color <- c(rep("#DD6B06", 3), rep("#2CAFBB", 74))

# Visualizar el grafo
referidos <- visNetwork(nodes, aristas) %>% 
  visIgraphLayout(layout = "layout_with_fr") %>% 
    visNodes(color = list(background = "#5DBAC3",
                        border = "#01636D")) %>% 
  visEdges(color = list(color = "grey", highlight = "#014D54" )) %>% 
  visOptions(highlightNearest = TRUE)

# visSave(referidos, file = "referidos.html")

referidos

Otro paquete:

# Otra alternativa
nodes_d3 <- mutate(nodes, id = id - 1)
edges_d3 <- mutate(aristas, from = from - 1, to = to - 1)

forceNetwork(Links = edges_d3, Nodes = nodes_d3, Source = "from", Target = "to", 
             NodeID = "label", Group = "color", Value = "peso", 
             opacity = 1, fontSize = 16, zoom = TRUE)
LS0tCnRpdGxlOiAiT05BOiBPcmdhbml6YXRpb24gTmV0d29yayBBbmFseXNpcyIKYXV0aG9yOiAiSW5lcywgSm9zZWZpbmEgeSBTb2bDrWEiCmRhdGU6ICIxOS8wNC8yMDI0IgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgY29kZV9mb2xkaW5nOiBzaG93Ci0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSwgbWVzc2FnZSA9IEZBTFNFLCB3YXJuaW5nID0gRkFMU0UpCmBgYAoKIyBJbnRyb2R1Y2Npw7NuCgpEZXNhcnJvbGxvIGRlIGdyYWZvcyBhcGxpY2Fkb3MgZW4gcHJveWVjdG9zIGRlICoqT3JnYW5pemF0aW9uYWwgTmV0d29yayBBbmFseXNpcyoqLgoKYGBge3IgcGFxdWV0ZXN9CnBhY21hbjo6cF9sb2FkKHRpZHl2ZXJzZSwgb25hZGF0YSwgdmlzTmV0d29yaywgaWdyYXBoLCBuZXR3b3JrRDMpCmBgYAoKIyBQcm9ncmFtYSBkZSByZWZlcmlkb3MKCipFbCBkYXRhc2V0KgpWYW1vcyBhIHRyYWJhamFyIGNvbiB1biBkYXRhc2V0IGFybWFkbyBjb24gbG9zIGNvbnRhY3RvcyBkZSBMaW5rZWRJbiBkZSBJbsOpcyBNdXJ0YWdoLCBTb2bDrWEgV2VpbnRyYXViIHkgSm9zZWZpbmEgU290byBBY2ViYWwuCgpgYGB7ciBkYXRvczF9CiMgQ2FyZ2EgZGUgZGF0b3MKY29udGFjdG9zIDwtIHJlYWRfZGVsaW0oIi9Vc2Vycy9pbmVzbXVydGFnaC9Eb3dubG9hZHMvQ29ubmVjdGlvbnMuY3N2IiwgZGVsaW0gPSAiOyIpCmdsaW1wc2UoY29udGFjdG9zKQpgYGAKCk5vcyBxdWVkYW1vcyBjb24gbG9zIGRhdG9zIGRlIGFxdWVsbG9zIGNvbnRhY3RvcyBxdWUgdHJhYmFqYW4gZW4gYWxndW4gYXJlYSByZWxhY2lvbmFkYSBjb24gRGF0b3MuCgpgYGB7cn0KIyBGaWx0cmFyIGRvbmRlIGxhIHBvc2ljacOzbiBkaWdhICJEYXRhIFNjaWVudGlzdCIsICJEYXRhIEFuYWx5c3QiIG8gIkRhdGEgQW5hbHl0aWNzIgprZXl3b3JkcyA8LSBjKCdkYXRhJywgJ2RhdG9zJywgJ0NETycpCmRhdG9zIDwtIGNvbnRhY3RvcyAlPiUgZmlsdGVyKHN0cl9kZXRlY3QoUG9zaXRpb24sIHJlZ2V4KHBhc3RlKGtleXdvcmRzLCBjb2xsYXBzZT0ifCIpLCBpZ25vcmVfY2FzZT1UUlVFLCBwZXJsPVRSVUUpKSkKCgpnbGltcHNlKGRhdG9zKQpgYGAKCmBgYHtyfQojIElkZW50aWZpY2Ftb3MgbG9zIGRhdG9zIMO6bmljb3MgZGUgb3JpZ2VuCm9yaWdlbiA8LSBkYXRvcyAlPiUgZGlzdGluY3QoT3JpZ2VuKSAlPiUgcmVuYW1lKGxhYmVsPU9yaWdlbikKCiMgSWRlbnRpZmljYW1vcyBsb3MgZGF0b3Mgw7puaWNvcyBkZSBjb250YWN0bwpjb250YWN0byA8LSBkYXRvcyAlPiUgZGlzdGluY3QoQ29udGFjdCkgJT4lIHJlbmFtZShsYWJlbD1Db250YWN0KQoKYGBgCgpgYGB7cn0KIyBVbmlmaWNhbW9zIGxvcyBkb3MgZGF0YWZyYW1lcwpub2RlcyA8LSBmdWxsX2pvaW4ob3JpZ2VuLCBjb250YWN0bywgYnkgPSAibGFiZWwiKQoKIyBBw7FhZGltb3MgdW5hIGNvbHVtbmEgZGUgSUQKbm9kZXMgPC0gbm9kZXMgJT4lIHJvd2lkX3RvX2NvbHVtbigiaWQiKQoKbm9kZXMKYGBgCgpgYGB7cn0KIyBDYWxjdWxhbW9zIGxhIGNhbnRpZGFkIGRlIGNvbmV4aW9uZXMgZW50cmUgT3JpZ2VuIHkgQ29udGFjdG8KY29uZXhpb24gPC0gZGF0b3MgJT4lIAogIGdyb3VwX2J5KE9yaWdlbiwgQ29udGFjdCkgJT4lIAogIHN1bW1hcmlzZShwZXNvID0gbigpKSAlPiUgCiAgdW5ncm91cCgpCgojIEluZGljbyBhIGxhcyBhcmlzdGFzIGN1w6FsIGVzIGVsIG5vZG8gZGUgb3JpZ2VuIHkgY3XDoWwgZWwgZGUgZGVzdGlubwphcmlzdGFzIDwtIGNvbmV4aW9uICU+JSAKICBsZWZ0X2pvaW4obm9kZXMsIGJ5ID0gYygiT3JpZ2VuIiA9ICJsYWJlbCIpKSAlPiUgCiAgcmVuYW1lKGZyb20gPSBpZCkKCmFyaXN0YXMgPC0gYXJpc3RhcyAlPiUgCiAgbGVmdF9qb2luKG5vZGVzLCBieSA9IGMoIkNvbnRhY3QiID0gImxhYmVsIikpICU+JSAKICByZW5hbWUodG8gPSBpZCkKCiMgVmVyIGVsIGRhdGFzZXQKYXJpc3RhcwoKIyBTZWxlY2Npb25hbW9zIGFsZ3VuYXMgY29sdW1uYXMKYXJpc3RhcyA8LSBzZWxlY3QoYXJpc3RhcywgZnJvbSwgdG8sIHBlc28pCgpgYGAKCiMjIENyZWFjacOzbiBkZWwgZ3JhZm8KCmBgYHtyfQplZGdlcyA8LSBtdXRhdGUoYXJpc3Rhcywgd2lkdGggPSBwZXNvLzUgKyAxKQoKbm9kZXMkY29sb3IgPC0gYyhyZXAoIiNERDZCMDYiLCAzKSwgcmVwKCIjMkNBRkJCIiwgNzQpKQoKIyBWaXN1YWxpemFyIGVsIGdyYWZvCnJlZmVyaWRvcyA8LSB2aXNOZXR3b3JrKG5vZGVzLCBhcmlzdGFzKSAlPiUgCiAgdmlzSWdyYXBoTGF5b3V0KGxheW91dCA9ICJsYXlvdXRfd2l0aF9mciIpICU+JSAKICAgIHZpc05vZGVzKGNvbG9yID0gbGlzdChiYWNrZ3JvdW5kID0gIiM1REJBQzMiLAogICAgICAgICAgICAgICAgICAgICAgICBib3JkZXIgPSAiIzAxNjM2RCIpKSAlPiUgCiAgdmlzRWRnZXMoY29sb3IgPSBsaXN0KGNvbG9yID0gImdyZXkiLCBoaWdobGlnaHQgPSAiIzAxNEQ1NCIgKSkgJT4lIAogIHZpc09wdGlvbnMoaGlnaGxpZ2h0TmVhcmVzdCA9IFRSVUUpCgojIHZpc1NhdmUocmVmZXJpZG9zLCBmaWxlID0gInJlZmVyaWRvcy5odG1sIikKCnJlZmVyaWRvcwoKYGBgCgpPdHJvIHBhcXVldGU6CgpgYGB7cn0KIyBPdHJhIGFsdGVybmF0aXZhCm5vZGVzX2QzIDwtIG11dGF0ZShub2RlcywgaWQgPSBpZCAtIDEpCmVkZ2VzX2QzIDwtIG11dGF0ZShhcmlzdGFzLCBmcm9tID0gZnJvbSAtIDEsIHRvID0gdG8gLSAxKQoKZm9yY2VOZXR3b3JrKExpbmtzID0gZWRnZXNfZDMsIE5vZGVzID0gbm9kZXNfZDMsIFNvdXJjZSA9ICJmcm9tIiwgVGFyZ2V0ID0gInRvIiwgCiAgICAgICAgICAgICBOb2RlSUQgPSAibGFiZWwiLCBHcm91cCA9ICJjb2xvciIsIFZhbHVlID0gInBlc28iLCAKICAgICAgICAgICAgIG9wYWNpdHkgPSAxLCBmb250U2l6ZSA9IDE2LCB6b29tID0gVFJVRSkKCmBgYAo=