El objetivo de este post es realizar un bar chart race de los sitios con más visitas desde 1994 a 2020.
library(tidyverse)
library(gganimate)
library(RColorBrewer)
Cargamos este dataset de Kaggle
site <- read.csv("C:\\Users\\jonathan.rzezak\\Desktop\\R\\Most Popular websites (1).csv")
Convertimos los datos de formato serie de tiempo a formato tidy
site <- site %>%
pivot_longer(cols=starts_with("X"),
names_to= "Year",
values_to = "Visitas")
Eliminamoslas “X” y filtramos las filas que terminen en “.1” (son datos repetidos)
site$Year <- str_remove(site$Year,"X")
site <- filter(site,Year != "2020.1" & Year != "1993.1")
Para cada año asignamos un ranking
site <- site %>%
group_by(Year) %>%
arrange(Year, desc(Visitas)) %>%
mutate(ranking = row_number()) %>%
filter(ranking <=10, Year!="1993")
Creo una paleta de colores a partir de una paleta existente de RColorBrewer
nb.cols <- 38
mycolors <- colorRampPalette(brewer.pal(12, "Paired"))(nb.cols)
a <- ggplot(site)+
geom_col(aes(ranking,Visitas,fill=Website))+
scale_fill_manual(values=mycolors)+
geom_text(aes(ranking,Visitas,label=as.factor(Visitas)),hjust=-0.1,size=5)+
geom_text(aes(ranking, y=0 , label = Website), hjust=1.1,size=5) +
labs(title = "Most Popular websites 1994-2020",
subtitle = "Año: {next_state}",
x=NULL,
y=NULL)+
coord_flip(clip = "off")+
scale_x_reverse()+
theme_minimal()+
theme(legend.position = "none",
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank(),
plot.title = element_text(hjust = 0, size=20,face="bold"),
plot.subtitle = element_text(hjust = 0, size=16, face="italic"),
plot.margin = margin(1, 4, 1, 3, "cm"))+
transition_states(Year,transition_length = 1,state_length =0,wrap = FALSE)+
enter_fade()+
exit_fade()+
view_follow(fixed_x = TRUE)
animate(a,
nframes=800,
fps = 30,
start_pause = 50,
end_pause=50,
width = 1000,
height = 600)