library(readr)
## Warning: package 'readr' was built under R version 3.6.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(resumeRdesc)
set.seed(4444)
n <- 100
media.edad <- 45; ds.edad <- 10
media.peso <- 75; ds.peso <- 15
media.estatura <- 1.70; ds.estatura <- 0.10
edad <- round(rnorm(n = n, mean = media.edad, sd = ds.edad),0)
peso <- round(rnorm(n = n, mean = media.peso, sd = ds.peso),2)
estatura <- round(rnorm(n = n, mean = media.estatura, sd = ds.estatura),2)
personas <- data.frame(edad, peso, estatura)
head(personas); tail(personas)
## edad peso estatura
## 1 67 93.70 1.75
## 2 25 94.33 1.74
## 3 29 78.75 1.70
## 4 52 67.72 1.73
## 5 55 86.63 1.76
## 6 50 89.28 1.73
## edad peso estatura
## 95 31 89.59 1.79
## 96 54 50.29 1.67
## 97 35 64.17 1.77
## 98 48 89.92 1.59
## 99 46 79.48 1.70
## 100 47 82.32 1.71
cuartiled <- quantile(personas$edad, probs = c(0.25, 0.50, 0.75), type = 6)
cuartiled[1]
## 25%
## 40
cuartiled[2]
## 50%
## 46
cuartiled[3]
## 75%
## 52
cuartilep <- quantile(personas$peso, probs = c(0.25, 0.50, 0.75), type = 6)
cuartilep[1]
## 25%
## 66.525
cuartilep[2]
## 50%
## 78.74
cuartilep[3]
## 75%
## 88.2125
cuartilest <- quantile(personas$estatura, probs = c(0.25, 0.50, 0.75), type = 6)
cuartilest[1]
## 25%
## 1.64
cuartilest[2]
## 50%
## 1.7
cuartilest[3]
## 75%
## 1.7475
percentiled <- quantile(personas$edad, probs = c(0.10, 0.30, 0.50, 0.70, 0.90), type = 7)
percentiled[1]
## 10%
## 33.7
percentiled[2]
## 30%
## 41
percentiled[3]
## 50%
## 46
percentiled[4]
## 70%
## 51
percentiled[5]
## 90%
## 58.1
percentilpe <- quantile(personas$peso, probs = c(0.10, 0.30, 0.50, 0.70, 0.90), type = 7)
percentilpe[1]
## 10%
## 57.509
percentilpe[2]
## 30%
## 67.776
percentilpe[3]
## 50%
## 78.74
percentilpe[4]
## 70%
## 84.062
percentilpe[5]
## 90%
## 94.114
percentilest <- quantile(personas$estatura, probs = c(0.10, 0.30, 0.50, 0.70, 0.90), type = 7)
percentilest[1]
## 10%
## 1.58
percentilest[2]
## 30%
## 1.66
percentilest[3]
## 50%
## 1.7
percentilest[4]
## 70%
## 1.74
percentilest[5]
## 90%
## 1.8
ggplot(data = personas, aes(edad, colour = 'edad')) +
geom_histogram(position = "stack", bins = 30)
ggplot(data = personas, aes(peso, colour = 'peso')) +
geom_histogram(position = "stack", bins = 30)
ggplot(data = personas, aes(estatura, colour = 'estatura')) +
geom_histogram(position = "stack", bins = 30)
ggplot(data = personas, aes(edad, colour = 'edad')) +
geom_density()
ggplot(data = personas, aes(peso, colour = 'peso')) +
geom_density()
ggplot(data = personas, aes(estatura, colour = 'estatura')) +
geom_density()
ggplot(data = personas, aes(edad, colour = 'edad')) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = cuartiled[1],
color = "Q1"),
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = cuartiled[2],
color = "Q2"),
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = cuartiled[3],
color = "Q3"),
linetype = "dashed",
size = 1) +
labs(title = "Histograma de Edad",subtitle = paste("Cuartil 1 al 25% = ",round(cuartiled[1],2), ", Cuartil 2 al 50% = ",round(cuartiled[2],2), ", Cuartil 3 al 75% = ",round(cuartiled[3],2)))
ggplot(data = personas, aes(peso, colour = 'peso')) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = cuartilep[1],
color = "Q1"),
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = cuartilep[2],
color = "Q2"),
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = cuartilep[3],
color = "Q3"),
linetype = "dashed",
size = 1) +
labs(title = "Histograma de Peso",subtitle = paste("Cuartil 1 al 25% = ",round(cuartilep[1],2), ", Cuartil 2 al 50% = ",round(cuartilep[2],2), ", Cuartil 3 al 75% = ",round(cuartilep[3],2)))
ggplot(data = personas, aes(estatura, colour = 'estatura')) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = cuartilest[1],
color = "Q1"),
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = cuartilest[2],
color = "Q2"),
linetype = "dashed",
size = 1) +
geom_vline(aes(xintercept = cuartilest[3],
color = "Q3"),
linetype = "dashed",
size = 1) +
labs(title = "Histograma de Estatura",subtitle = paste("Cuartil 1 al 25% = ",round(cuartilest[1],2), ", Cuartil 2 al 50% = ",round(cuartilest[2],2), ", Cuartil 3 al 75% = ",round(cuartilest[3],2)))
ggplot(data = personas, aes(edad, colour = 'edad')) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = percentiled[1],
color = "Perc1"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentiled[2],
color = "Perc2"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentiled[3],
color = "Perc3"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentiled[4],
color = "Perc4"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentiled[5],
color = "Perc5"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Edad. Perc = Percentiles",subtitle = paste("Perc al 10% = ",round(percentiled[1],2), "Perc al 30% = ",round(percentiled[2],2),"Perc al 50% = ",round(percentiled[3],2),"Perc al 70% = ",round(percentiled[4],2),"Perc al 90% = ",round(percentiled[5],2)))
ggplot(data = personas, aes(peso, colour = 'peso')) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = percentilpe[1],
color = "Perc1"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilpe[2],
color = "Perc2"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilpe[3],
color = "Perc3"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilpe[4],
color = "Perc4"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilpe[5],
color = "Perc5"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Edad. Perc = Percentiles",subtitle = paste("Perc al 10% = ",round(percentilpe[1],2),"Perc al 30% = ",round(percentilpe[2],2),"Perc al 50% = ",round(percentilpe[3],2),"Perc al 70% = ",round(percentilpe[4],2),"Perc al 90% = ",round(percentilpe[5],2)))
ggplot(data = personas, aes(estatura, colour = 'estatura')) +
geom_histogram(bins = 30) +
geom_vline(aes(xintercept = percentilest[1],
color = "Perc1"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilest[2],
color = "Perc2"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilest[3],
color = "Perc3"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilest[4],
color = "Perc4"),
linetype = "solid",
size = 2) +
geom_vline(aes(xintercept = percentilest[5],
color = "Perc5"),
linetype = "solid",
size = 2) +
labs(title = "Histograma de Edad. Perc = Percentiles",subtitle = paste("Perc al 10% = ",round(percentilest[1],2),"Perc al 30% = ",round(percentilest[2],2),"Perc al 50% = ",round(percentilest[3],2),"Perc al 70% = ",round(percentilest[4],2),"Perc al 90% = ",round(percentilest[5],2)))
En este caso pudimos ver en acción y aprender sobre los percentiles y los cuartiles. Los cuartiles son valores que dividen una muestra de datos en cuatro partes iguales. Utilizando cuartiles puede evaluar rápidamente la dispersión y la tendencia central de un conjunto de datos, que son los pasos iniciales importantes para comprender sus datos. 25% de los datos es menor que o igual a este valor. Como pudimos ver en el caso aunque los cuartiles y percentiles no son muy difíciles de sacar en teoría, cuando se manejan muchos datos los programas como R nos ahorran mucho tiempo y esfuerzo por los que nos puede ayudar mucho el saber usar estas herramientas, así como también nos apoya el saber como se realizan los cálculos de dichos casos como el que ahora se presentó.