Introducción

Estas gráficas fueron creadas para una clase en el INAP (Instituto Nacional de Administraciones Públicas).

Se analizó la estructura de perfiles de la Administración General del Estado (AGE) en Twitter a partir de una lista de 631 perfiles elaborada por David Álvarez (@dalvarez37)

Fuente de datos

Lista de AGE 2021

Librerías necesarias

library(readr)
library(dplyr)
library(tidyr)
library(ggrepel)

locale(date_names = "es", date_format = "%AD", time_format = "%AT",
  decimal_mark = ",", grouping_mark = ".", tz = "UTC",
  encoding = "UTF-8", asciify = FALSE)
## <locale>
## Numbers:  123.456,78
## Formats:  %AD / %AT
## Timezone: UTC
## Encoding: UTF-8
## <date_names>
## Days:   domingo (dom.), lunes (lun.), martes (mar.), miércoles (mié.), jueves
##         (jue.), viernes (vie.), sábado (sáb.)
## Months: enero (ene.), febrero (feb.), marzo (mar.), abril (abr.), mayo (may.),
##         junio (jun.), julio (jul.), agosto (ago.), septiembre (sept.),
##         octubre (oct.), noviembre (nov.), diciembre (dic.)
## AM/PM:  a. m./p. m.

Plantilla de gráficos

theme_a <- function(base_size = 12,
                    base_family = "sans"
                           )
    {
 
    tema <-
        theme_bw(base_size=base_size) +
          
        theme(legend.position="top") +
        theme(legend.text = element_text(size=base_size,family = base_family)) +
        theme(plot.title=element_text(size=base_size+2, 
                                      vjust=1.25, 
                                      family=base_family, 
                                      hjust = 0.5
                                      )) +
        theme(plot.subtitle=element_text(size=base_size-1, family = base_family))  +
        theme(text = element_text(size=base_size-1,family = base_family)) +
        theme(axis.text.x=element_text(size=base_size,family = base_family)) +
        theme(axis.text.y=element_text(size=base_size, family = base_family)) +

        theme(axis.title.x=element_text(size=base_size, vjust=0, family = base_family)) +
        theme(axis.title.y=element_text(size=base_size, vjust=1.25, family = base_family)) +
        theme(plot.caption=element_text(size=base_size-2, family = base_family)) +
        theme(strip.text = element_text(size=base_size+2, family = base_family)) +
        theme(strip.text.x = element_text(size=base_size+1, family = base_family)) +
        theme(strip.text.y = element_text(size=base_size+1,, family = base_family)) 

    return (tema)
}

Leemos y unimos datos

`%notin%` <- Negate(`%in%`)

#leemos datos
list_members <- read_csv("https://raw.githubusercontent.com/congosto/congosto.github.io/master/AGE_perfiles.csv")
list_nodos <- read_csv("https://raw.githubusercontent.com/congosto/congosto.github.io/master/AGE_nodos.csv")

# Color y nombre de los grupos
grupos <- c("0","2","3","1")
grupos_nombres <- c("Gobierno","Exteriores",
                     "Cultura","Ciencia")
grupos_color <- c("0" = "#D433FF","2" = "#078C20", 
                  "3" = "#04A4FA" ,"1" = "#EF634A")

# cambios de formato
list_members$since <- as.Date(list_members$since)
list_nodos$modularity_class <- as.character(list_nodos$modularity_class)
list_nodos$modularity_class <- factor(list_nodos$modularity_class, levels = grupos)

#Unimos datos
perfiles <- left_join(list_members,list_nodos, by = c("screen_name" = "Label") ) %>%
            select (screen_name,followers,following,statuses,since, modularity_class,
                    indegree,outdegree) %>%
            mutate (since_dias = as.numeric( Sys.Date() - since ),
                    tweets_dia = statuses / since_dias)

Clasificamos por seguidores/seguidos

top_perfiles <- perfiles  %>%
                filter (followers > 500000 |
                        following > 10000)

ggplot () +
  geom_point(data = perfiles , aes (x= following, y= followers,
             color= modularity_class)) +
  labs(x = "Seguidos ", y = "Seguidores ",
        title = "Seguidores vs. seguidos de perfiles de AGE",
        color ="")+
  geom_text_repel (data = top_perfiles, aes (x= following, y= followers,
                                       label = screen_name, color= modularity_class),
                   show.legend = FALSE, size = 5) +
  scale_color_manual (values = grupos_color,
                       labels = grupos_nombres) +
  scale_x_continuous(labels = scales::label_number_si())+
  scale_y_continuous(labels = scales::label_number_si())+
  theme_a()+
  guides(color = guide_legend(override.aes = list(size = 4) ) )

Clasificamos por seguidores/actividad

top_perfiles <- perfiles  %>%
                filter (followers > 500000 |
                        tweets_dia > 30)

ggplot () +
  geom_point(data = perfiles , aes (x = tweets_dia, y = followers ,
             color= modularity_class)) +
  labs(x = "Tweets/día", y = "Seguidores",
        title = "Seguidores vs. actividad de perfiles de AGE",
        color ="")+
  geom_text_repel (data = top_perfiles, aes (x = tweets_dia, y = followers ,
                                       label = screen_name, color= modularity_class),
                   show.legend = FALSE,size = 5) +
  scale_color_manual (values = grupos_color,
                       labels = grupos_nombres) +
  scale_y_continuous(labels = scales::label_number_si())+
  theme_a()+
  guides(color = guide_legend(override.aes = list(size = 4) ) )

Clasificamos por seguidores/antigüedad

top_perfiles <- perfiles  %>%
                filter (followers > 500000 )

ggplot () +
  geom_point(data = perfiles , aes (x = since, y = followers,
             color= modularity_class)) +
  labs(x = "Antigüedad", y = "Seguidores",
        title = "Seguidores vs. antigüedad de perfiles de AGE",
        color ="")+
  geom_text_repel (data = top_perfiles, aes (x= since ,  y= followers,
                                       label = screen_name, color= modularity_class),
                   max.overlaps=18,force=10,
                   show.legend = FALSE, size = 5) +
  scale_color_manual (values = grupos_color,
                       labels = grupos_nombres) +
  scale_y_continuous(labels = scales::label_number_si())+
  theme_a()+
  guides(color = guide_legend(override.aes = list(size = 4) ) )

Clasificamos por seguidores/seguidos dentro de la AGE

top_perfiles <- perfiles  %>%
                filter (indegree > 200 )


ggplot () +
  geom_point(data = perfiles , aes (x = outdegree, y = indegree,
             color= modularity_class)) +
  labs(x = "Seguidos", y = "Seguidores",
        title = "Seguidores vs. seguidos entre perfiles de AGE",
        color ="")+
  geom_text_repel (data = top_perfiles, aes (x = outdegree, y = indegree,
                                       label = screen_name, color= modularity_class),
                   show.legend = FALSE,size = 4.5) +
  scale_color_manual (values = grupos_color,
                       labels = grupos_nombres) +
  scale_y_continuous(labels = scales::label_number_si())+
  theme_a()+
  guides(color = guide_legend(override.aes = list(size = 4) ) )