Con base en el ejercicio Beyond Bar and Box Plots se procede a replicar las gráficas que se mostraron, de manera no exhaustiva, con el objetivo de aplicar el código a una base de datos económica para ejemplificar para qué tipo de datos se puede utilizar. Para el caso propuesto se utiliza el índice de Gini para el año 2019 publicado por el Banco Mundial. La información se extrae con el uso de APIs. Para utilizar el código para otras variables es importante tener en cuenta que se necesitan una variable categórica con más de dos niveles y una variable continua.
EL primer paso es cargar las librerías
library(tidyverse) ## data wrangling + ggplot2
library(colorspace) ## adjust colors
library(rcartocolor) ## Carto palettes
library(ggforce) ## sina plots
library(ggdist) ## halfeye plots
library(ggridges) ## ridgeline plots
library(ggbeeswarm) ## beeswarm plots
library(gghalves) ## off-set jitter
library(systemfonts) ## custom fonts
library(readxl) ## read data
library(extrafont) ## fonts
Para importar las bases se utiliza la API del Banco Mundial y se hace una limpieza y depuración de la información para conservar la información necesaria para hacer las gráficas.
library(WDI)
dat = WDI(indicator='SI.POV.GINI', extra=TRUE)
GINI <- dat %>%
mutate(i= str_replace(dat$income,"income","")) %>%
filter(year==2019,
!is.na(SI.POV.GINI),
income!="Low income") %>%
select(country,i, SI.POV.GINI)
GINI <- rename(GINI, Ingresos=i,Indice=SI.POV.GINI)
## general theme
theme_set(theme_void(base_family = "Garamond"))
theme_update(
axis.text.x = element_text(color = "black", face = "bold", size = 16,
margin = margin(t = 6)),
axis.text.y = element_text(color = "black", size = 16, hjust = 1,
margin = margin(r = 6), family = "Garamond"),
axis.line.x = element_line(color = "black", size = 1),
panel.grid.major.y = element_line(color = "grey90", size = .6),
plot.background = element_rect(fill = "white", color = "white"),
plot.margin = margin(rep(20, 4))
)
## theme for horizontal charts
theme_flip <-
theme(
axis.text.x = element_text(face = "plain", family = "Garamond", size = 22),
axis.text.y = element_text(face = "bold", family = "Garamond", size = 16),
panel.grid.major.x = element_line(color = "grey90", size = .6),
panel.grid.major.y = element_blank(),
legend.position = "top",
legend.text = element_text(family = "Garamond", size = 18),
legend.title = element_text(face = "bold", size = 18, margin = margin(b = 25))
)
## custom colors
my_pal <- rcartocolor::carto_pal(n = 8, name = "Bold")[c(1, 3, 7, 2)]
ggplot(GINI, aes(x = Ingresos, y = Indice, fill = Ingresos)) +
geom_bar(stat = "summary", width = .8) +
scale_y_continuous(expand = c(0, 0), breaks = seq(10,45,5)) +
scale_fill_manual(values = my_pal, guide = "none")
ggplot(GINI, aes(x = Ingresos, y = Indice, color = Ingresos, fill = Ingresos)) +
stat_summary(
geom = "errorbar",
fun.max = function(x) mean(x) + sd(x),
fun.min = function(x) mean(x) - sd(x),
width = .3, size = 1.2
) +
geom_bar(stat = "summary", width = .8, size = .8) +
scale_y_continuous(expand = c(0, 0), breaks = seq(10,45,5)) +
scale_fill_manual(values = my_pal, guide = "none") +
scale_color_manual(values = my_pal, guide = "none")
g <- ggplot(GINI, aes(x = Ingresos, y = Indice, color = Ingresos, fill = Ingresos)) +
scale_y_continuous(breaks = seq(10,55,5)) +
scale_color_manual(values = my_pal, guide = "none") +
scale_fill_manual(values = my_pal, guide = "none")
g +
geom_boxplot(alpha = .5, size = 1.5, outlier.size = 5)
g +
geom_violin(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .5))),
size = 1.2
)
g +
geom_violin(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .5))),
size = 1.2, bw = .2
)
g +
ggdist::stat_halfeye(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .7)))
)
g +
ggdist::stat_halfeye(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .7))),
adjust = .2, position = position_nudge(x = -.3)
)
g_ridges <-
ggplot(GINI, aes(Indice, fct_rev(Ingresos), color = Ingresos, fill = Ingresos)) +
coord_cartesian(clip = "off") +
scale_y_discrete(expand = c(.07, .07)) +
scale_color_manual(values = my_pal, guide = "none") +
scale_fill_manual(values = my_pal, guide = "none") +
theme_flip
g_ridges +
ggridges::geom_density_ridges(
alpha = .8, size = 1.5,
rel_min_height = 0.01
)
g_ridges +
ggridges::stat_density_ridges(
quantile_lines = TRUE, quantiles = 2,
color = "black", alpha = .8, size = 1.5
)
q_pal <- colorspace::lighten(rcartocolor::carto_pal(n = 4, name = "Geyser")[c(2, 1, 4, 3)], .6)
#q_pal <- colorspace::adjust_transparency(rcartocolor::carto_pal(n = 4, name = "Tropic")[c(2, 1, 4, 3)], alpha = .7)
g_ridges +
ggridges::stat_density_ridges(
aes(fill = factor(stat(quantile))),
geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = 4,
color = "black", size = 1
) +
scale_fill_manual(values = q_pal, name = "Quartile:") +
guides(fill = guide_legend(override.aes = list(color = "transparent")))
g_ridges +
ggridges::stat_density_ridges(
aes(fill = factor(stat(quantile))),
geom = "density_ridges_gradient", calc_ecdf = TRUE,
quantiles = 4,
color = "black", size = 1,
bandwidth = .1
) +
scale_fill_manual(values = q_pal, name = "Quartile:") +
guides(fill = guide_legend(override.aes = list(color = "transparent")))
g_ridges +
ggridges::stat_density_ridges(
aes(fill = factor(stat(quantile))),
geom = "density_ridges_gradient", calc_ecdf = TRUE,
quantiles = c(0.025, 0.975),
color = "black", size = 1.5
) +
scale_fill_manual(
name = "Probability:", values = c("#994c00", "grey70", "#003366"),
labels = c("(0, 0.025]", "(0.025, 0.975]", "(0.975, 1]")
) +
guides(fill = guide_legend(override.aes = list(color = "transparent")))
g_interval <-
ggplot(GINI, aes(Ingresos, Indice)) +
scale_color_viridis_d(
option = "mako", name = "Level:", direction = -1,
begin = .15, end = .9
) +
guides(
color = guide_legend(reverse = TRUE, title.position = "top")
) +
theme(
legend.position = c(.25, .80), legend.direction = "horizontal",
legend.text = element_text(family = "Garamond", size = 10),
legend.title = element_text(face = "bold", size = 10, hjust = .5)
)
g_interval +
ggdist::stat_interval(size = 12)
g_interval +
ggdist::stat_interval(
.width = c(.25, .5, .95, 1),
size = 7
) +
ggdist::stat_halfeye(
adjust = .33, ## bandwidth
width = .7, fill = "grey85",
interval_colour = NA, point_colour = "black",
shape = 23, stroke = 1.5, point_size = 5, point_fill = "white",
position = position_nudge(x = .03),
aes(thickness = stat(f*n))
) +
scale_color_viridis_d(
option = "mako", name = "Level:", direction = -1,
begin = .15, end = .9,
labels = function(x) paste0(as.numeric(x)*100, "%")
)
g +
ggdist::stat_gradientinterval(
width = .3, color = "black"
)
g + geom_point(size = 10, alpha = .33)
g + geom_point(shape = 95, size = 50, alpha = .33)
g + geom_jitter(size = 7, alpha = .5)
g +
geom_point(
position = position_jitter(width = .2, seed = 0),
size = 7, alpha = .5
)
g +
geom_point(
position = position_jitter(width = .2, seed = 0),
size = 7, alpha = .5
) +
geom_point(
position = position_jitter(width = .2, seed = 0),
size = 7, stroke = .9, shape = 1, color = "black"
)
g +
ggforce::geom_sina(
maxwidth = .6, scale = "count", seed = 1,
size = 7, alpha = .5
) +
ggforce::geom_sina(
maxwidth = .6, scale = "count", seed = 1,
size = 7, shape = 1, color = "black", stroke = .8
)
g +
ggdist::stat_dots(position = position_nudge(x = -.25))
g +
ggdist::stat_dots(layout = "swarm", side = "both")
g +
geom_boxplot(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .7))),
size = 1.5, outlier.shape = NA
) +
geom_point(
position = position_jitter(width = .1, seed = 0),
size = 7, alpha = .5
) +
geom_point(
position = position_jitter(width = .1, seed = 0),
size = 7, stroke = .9, shape = 1, color = "white"
)
g +
geom_violin(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .5))),
size = 1.2, bw = .2
) +
geom_boxplot(
fill = "white", size = 1.2, width = .2, outlier.size = 5
)
g +
geom_violin(
aes(fill = Ingresos, fill = after_scale(colorspace::lighten(fill, .5))),
size = 1.2, bw = .2
) +
geom_boxplot(
fill = "white", size = 1.2, width = .2,
outlier.shape = NA, coef = 0
) +
geom_point(
position = position_jitter(width = .03, seed = 0),
size = 5, alpha = .5
) +
geom_point(
position = position_jitter(width = .03, seed = 0),
size = 5, stroke = .7, shape = 1, color = "black"
)
g +
geom_boxplot(
width = .2, fill = "white",
size = 1.5, outlier.shape = NA
) +
ggdist::stat_halfeye(
adjust = .33, ## bandwidth
width = .67,
color = NA, ## remove slab interval
position = position_nudge(x = .15)
) +
gghalves::geom_half_point(
side = "l",
range_scale = .3,
alpha = .5, size = 3
)