R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Introducción

Visualizaciones didácticas con datos simulados para practicar lectura de gráficos y storytelling analítico.

## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Adjuntando el paquete: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
set.seed(123)

# Dataset 1: Rendimiento de estudiantes (ridgelines)
n <- 1500
df_scores <- tibble(
  Carrera  = sample(c("Data Science","Economía","Psicología","Ingeniería","Educación","Medicina"), n, TRUE),
  Semestre = sample(paste0("S",1:6), n, TRUE),
  Puntaje  = pmin(pmax(round(rnorm(n, mean = 70, sd = 12)), 0), 100)
)

# Dataset 2: Ventas por categoría-subcategoría (treemap)
cats <- tibble(
  Categoria = rep(c("Tecnología","Hogar","Moda","Deportes","Libros"), each = 5),
  Subcategoria = c("Móviles","Laptops","Audio","Accesorios","Gaming",
                   "Cocina","Dormitorio","Baño","Decoración","Jardín",
                   "Hombre","Mujer","Calzado","Accesorios","Niños",
                   "Fitness","Outdoor","Ciclismo","Natación","Running",
                   "Novela","Académico","Infantil","Arte","Ciencia"),
  Ventas = round(runif(25, 5000, 50000))
)

# Dataset 3: Embudo (sankey)
etapas <- c("Visitas","Registros","Pruebas","Compras","Recurrentes")
vals   <- c(5000, 2200, 1200, 520, 210)
nodes  <- data.frame(name = etapas)
links  <- data.frame(
  source = c(0,1,2,3),
  target = c(1,2,3,4),
  value  = c(vals[2], vals[3], vals[4], vals[5])
)

# Dataset 4: Marketing (scatter interactivo)
df_marketing <- tibble(
  Canal       = rep(c("Ads","SEO","Email","Influencers","Social"), each = 30),
  Inversion   = round(runif(150, 1000, 20000), 0),
  CPC         = runif(150, 0.1, 2.5),
  Conversiones= round(0.0006*Inversion + rnorm(150, 0, 8) + ifelse(Canal=="SEO",25, ifelse(Canal=="Email",15,0)), 0)
) %>% mutate(ROI = (Conversiones*15 - Inversion)/Inversion)

Otros graficos en R Markdown

df_scores %>%
  ggplot(aes(x=Puntaje, y=reorder(Carrera, as.numeric(factor(Carrera))), fill=Carrera)) +
  ggridges::geom_density_ridges(scale=1.9, alpha=.85, color="white", size=.2) +
  scale_fill_brewer(palette="Set2") +
  labs(x="Puntaje", y=NULL, title="Distribución de Puntajes por Carrera") +
  theme_minimal(base_size = 14) +
  theme(legend.position="none", panel.grid.minor=element_blank(), plot.title=element_text(face="bold"))
## Warning in ggridges::geom_density_ridges(scale = 1.9, alpha = 0.85, color =
## "white", : Ignoring unknown parameters: `size`
## Picking joint bandwidth of 3.33

networkD3::sankeyNetwork(
  Links = links, Nodes = nodes,
  Source = "source", Target = "target", Value = "value", NodeID = "name",
  fontSize = 14, nodeWidth = 30, sinksRight = TRUE
)
p <- ggplot(df_marketing, aes(x=Inversion, y=Conversiones, size=CPC, color=Canal, text=paste0(
  "<b>Canal:</b> ", Canal,
  "<br><b>Inversión:</b> $", comma(Inversion),
  "<br><b>Conversiones:</b> ", Conversiones,
  "<br><b>CPC:</b> $", round(CPC,2),
  "<br><b>ROI:</b> ", percent(ROI, accuracy = 0.1)
))) +
  geom_point(alpha=.85) +
  geom_smooth(method="loess", se=FALSE, size=1) +
  scale_y_continuous(expand = expansion(mult = c(.02,.08))) +
  scale_x_continuous(labels = label_number(scale_cut = cut_short_scale())) +
  theme_minimal(base_size = 14) +
  theme(legend.position="top") +
  labs(x="Inversión", y="Conversiones", title="Inversión vs Conversiones (Tamaño = CPC)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plotly::ggplotly(p, tooltip="text")
## `geom_smooth()` using formula = 'y ~ x'