Este informe analiza las características de los seguidores de Pedro Sánchez, utilizando histogramas

La información se ha extraído mediante la API de Twitter

library(readr)
library(stringr)
library(ggplot2)
library(tidyr)
library(scales)
library(dplyr)
locale(date_names = "es", date_format = "%AD", time_format = "%AT",
  decimal_mark = ",", grouping_mark = ".", tz = "UTC",
  encoding = "UTF-8", asciify = FALSE)
## <locale>
## Numbers:  123.456,78
## Formats:  %AD / %AT
## Timezone: UTC
## Encoding: UTF-8
## <date_names>
## Days:   domingo (dom.), lunes (lun.), martes (mar.), miércoles (mié.), jueves
##         (jue.), viernes (vie.), sábado (sáb.)
## Months: enero (ene.), febrero (feb.), marzo (mar.), abril (abr.), mayo (may.),
##         junio (jun.), julio (jul.), agosto (ago.), septiembre (sept.),
##         octubre (oct.), noviembre (nov.), diciembre (dic.)
## AM/PM:  a. m./p. m.
followers_csv <- read_csv("sanchezcastejon_follower_profiles.csv")
df <- data.frame(followers_csv)

¿Cuál la distribución de la antiguedad en Twitter de sus seguidores?

ggplot(data = df, aes(x = since))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=15,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=15,geom='text', color='black', size=2, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous("Porcentaje de perfiles",labels = scales::percent_format(accuracy = 1))+
  labs(x = "Año de creación del perfil", y = "Porcentaje de perfiles",
       title = "Ditribución de los seguidores por antigüedad en Twitter")

¿Cuántos seguidores tienen?

ggplot(data = df, aes(x = trunc(log10(ifelse(followers == 0, 1, followers)))))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=8,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=8,geom='text', color='black', size=3, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Log (Followers)", y = "Porcentaje de perfiles",
       title = "Ditribución por número de seguidores")

¿A cuántos siguen?

ggplot(data = df, aes(x = trunc(log10(following))))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=7,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=7,geom='text', color='black', size=3, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Log (Following)", y = "Porcentaje de perfiles",
       title = "Ditribución por número de following")

¿Cuánto publican sus seguidores?

ggplot(data = df, aes(x = trunc(log10(statuses))))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=7,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=7,geom='text', color='black', size=3, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Log (Tweets)", y = "Porcentaje de perfiles",
       title = "Ditribución por número de tweets publicados")

Los que no tienen seguidores ¿qué antiguedad tienen en Twitter?

df2 <- df[which(df$followers == 0),names(df) %in% c("screen_name","followers","statuses","since")]
df3 <- df[which(df$followers < 10),names(df) %in% c("screen_name","followers","statuses","since")]
dim(df2)/dim(df)
## [1] 0.1283456 0.2000000
ggplot(data = df2, aes(x = since))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=15,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=15,geom='text', color='black', size=2, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Año de creación del perfil", y = "Porcentaje de perfiles",
       title = "Ditribución por antigüedad en Twitter sin seguidores")

Los que no tienen seguidores ¿Cuánto publican?

ggplot(data = df2, aes(x = trunc(log10(ifelse(statuses == 0, 1, statuses)))))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=7,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=7,geom='text', color='black', size=3, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Log (Tweets)", y = "Porcentaje de perfiles",
       title = "Ditribución por tweets publicados sin seguidores")

Los que tienen menos de 10 seguidores ¿qué antiguedad tienen en Twitter?

ggplot(data = df3, aes(x = since))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=15,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=15,geom='text', color='black', size=2, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Año de creación del perfil", y = "Porcentaje de perfiles",
       title = "Ditribución por antigüedad en Twitter < 10 seguidores")

ggplot(data = df3, aes(x = trunc(log10(ifelse(statuses == 0, 1, statuses)))))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=7,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=7,geom='text', color='black', size=3, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Log (Tweets)", y = "Porcentaje de perfiles",
       title = "Ditribución por tweets publicados con < 10 seguidores")

Los que tienen menos de 10 seguidores ¿Cuánto publican?

ggplot(data = df3, aes(x = trunc(log10(ifelse(statuses == 0, 1, statuses)))))+
  geom_histogram(color = "white",
                 fill = "cornflowerblue",
                 bins=7,
                 aes(y=(..count..)/sum(..count..))) +
  stat_bin(bins=7,geom='text', color='black', size=3, aes(y=(..count..)/sum(..count..), label=paste0(round(100*(..count..)/sum(..count..),2),'%')),
           position=position_stack(vjust = 1), vjust = -0.5)+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  labs(x = "Log (Tweets)", y = "Porcentaje de perfiles",
       title = "Ditribución por tweets publicados con < 10 seguidores")