| Name | Boston |
| Number of rows | 506 |
| Number of columns | 14 |
| _______________________ | |
| Column type frequency: | |
| numeric | 14 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| crim | 0 | 1 | 3.61 | 8.60 | 0.01 | 0.08 | 0.26 | 3.68 | 88.98 | ▇▁▁▁▁ |
| zn | 0 | 1 | 11.36 | 23.32 | 0.00 | 0.00 | 0.00 | 12.50 | 100.00 | ▇▁▁▁▁ |
| indus | 0 | 1 | 11.14 | 6.86 | 0.46 | 5.19 | 9.69 | 18.10 | 27.74 | ▇▆▁▇▁ |
| chas | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| nox | 0 | 1 | 0.55 | 0.12 | 0.38 | 0.45 | 0.54 | 0.62 | 0.87 | ▇▇▆▅▁ |
| rm | 0 | 1 | 6.28 | 0.70 | 3.56 | 5.89 | 6.21 | 6.62 | 8.78 | ▁▂▇▂▁ |
| age | 0 | 1 | 68.57 | 28.15 | 2.90 | 45.02 | 77.50 | 94.07 | 100.00 | ▂▂▂▃▇ |
| dis | 0 | 1 | 3.80 | 2.11 | 1.13 | 2.10 | 3.21 | 5.19 | 12.13 | ▇▅▂▁▁ |
| rad | 0 | 1 | 9.55 | 8.71 | 1.00 | 4.00 | 5.00 | 24.00 | 24.00 | ▇▂▁▁▃ |
| tax | 0 | 1 | 408.24 | 168.54 | 187.00 | 279.00 | 330.00 | 666.00 | 711.00 | ▇▇▃▁▇ |
| ptratio | 0 | 1 | 18.46 | 2.16 | 12.60 | 17.40 | 19.05 | 20.20 | 22.00 | ▁▃▅▅▇ |
| black | 0 | 1 | 356.67 | 91.29 | 0.32 | 375.38 | 391.44 | 396.22 | 396.90 | ▁▁▁▁▇ |
| lstat | 0 | 1 | 12.65 | 7.14 | 1.73 | 6.95 | 11.36 | 16.96 | 37.97 | ▇▇▅▂▁ |
| medv | 0 | 1 | 22.53 | 9.20 | 5.00 | 17.02 | 21.20 | 25.00 | 50.00 | ▂▇▅▁▁ |
# Dimensiones del dataset
cat("Dimensiones del dataset:", dim(Boston)[1], "filas y", dim(Boston)[2], "columnas\n")## Dimensiones del dataset: 506 filas y 14 columnas
## 'data.frame': 506 obs. of 14 variables:
## $ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ chas : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ rm : num 6.58 6.42 7.18 7 7.15 ...
## $ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ dis : num 4.09 4.97 4.97 6.06 6.06 ...
## $ rad : int 1 2 2 3 3 3 5 5 5 5 ...
## $ tax : num 296 242 242 222 222 222 311 311 311 311 ...
## $ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ black : num 397 397 393 395 397 ...
## $ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
## $ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
##
## Variables disponibles:
## [1] "crim" "zn" "indus" "chas" "nox" "rm" "age"
## [8] "dis" "rad" "tax" "ptratio" "black" "lstat" "medv"
## crim zn indus chas
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08205 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox rm age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
## Median : 5.000 Median :330.0 Median :19.05 Median :391.44
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## lstat medv
## Min. : 1.73 Min. : 5.00
## 1st Qu.: 6.95 1st Qu.:17.02
## Median :11.36 Median :21.20
## Mean :12.65 Mean :22.53
## 3rd Qu.:16.95 3rd Qu.:25.00
## Max. :37.97 Max. :50.00
# Variables clave para el análisis
variables_g1 <- c("medv", "rm", "lstat", "crim", "tax", "nox", "age")
Boston_g1 <- Boston %>% select(all_of(variables_g1))
# Medias
cat("\n--- MEDIAS ---\n")##
## --- MEDIAS ---
## medv rm lstat crim tax nox
## 22.5328063 6.2846344 12.6530632 3.6135236 408.2371542 0.5546951
## age
## 68.5749012
##
## --- DESVIACIONES ESTÁNDAR ---
## medv rm lstat crim tax nox
## 9.1971041 0.7026171 7.1410615 8.6015451 168.5371161 0.1158777
## age
## 28.1488614
# Histogramas múltiples
Boston %>%
select(medv, rm, lstat, crim) %>%
pivot_longer(cols = everything(),
names_to = "variable",
values_to = "valor") %>%
ggplot(aes(x = valor, fill = variable)) +
geom_histogram(color = "white", bins = 20, alpha = 0.7) +
facet_wrap(~variable, scales = "free", ncol = 2) +
scale_fill_manual(values = c("medv" = "#2E86AB",
"rm" = "#A23B72",
"lstat" = "#F18F01",
"crim" = "#C73E1D")) +
theme_minimal() +
labs(title = "Histogramas de variables clave",
x = "Valor",
y = "Frecuencia") +
theme(legend.position = "none")# Histogramas individuales
par(mfrow = c(2, 2))
hist(Boston$medv, main = "Histograma de MEDV",
xlab = "Valor mediano de vivienda", col = "skyblue", breaks = 20)
hist(Boston$rm, main = "Histograma de RM",
xlab = "Número promedio de habitaciones", col = "salmon", breaks = 20)
hist(Boston$lstat, main = "Histograma de LSTAT",
xlab = "% población de estatus bajo", col = "lightgreen", breaks = 20)
hist(Boston$crim, main = "Histograma de CRIM",
xlab = "Tasa de crimen", col = "purple", breaks = 20)# Boxplots comparativos
Boston %>%
select(medv, rm, lstat, crim, tax, age) %>%
pivot_longer(cols = everything(),
names_to = "variable",
values_to = "valor") %>%
ggplot(aes(x = variable, y = valor, fill = variable)) +
geom_boxplot(alpha = 0.7) +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
labs(title = "Distribución de variables clave",
subtitle = "Identificación de outliers",
x = "Variable",
y = "Valor") +
theme(legend.position = "none")# Matriz de correlación
ggpairs(Boston_g1,
title = "Matriz de Correlación - Variables Clave",
upper = list(continuous = wrap("cor", size = 3)),
lower = list(continuous = wrap("points", alpha = 0.3, size = 0.5)),
diag = list(continuous = wrap("densityDiag", alpha = 0.5)))## medv rm lstat crim tax nox age
## medv 1.000 0.695 -0.738 -0.388 -0.469 -0.427 -0.377
## rm 0.695 1.000 -0.614 -0.219 -0.292 -0.302 -0.240
## lstat -0.738 -0.614 1.000 0.456 0.544 0.591 0.602
## crim -0.388 -0.219 0.456 1.000 0.583 0.421 0.353
## tax -0.469 -0.292 0.544 0.583 1.000 0.668 0.506
## nox -0.427 -0.302 0.591 0.421 0.668 1.000 0.731
## age -0.377 -0.240 0.602 0.353 0.506 0.731 1.000
# Correlaciones más fuertes con MEDV (variable objetivo)
cor_with_medv <- cor_matrix["medv", ] %>%
sort(decreasing = TRUE)
cat("\nCorrelaciones con MEDV (ordenadas):\n")##
## Correlaciones con MEDV (ordenadas):
## medv rm age crim nox tax lstat
## 1.000 0.695 -0.377 -0.388 -0.427 -0.469 -0.738
ggplot(Boston, aes(x = lstat, y = medv)) +
geom_point(color = "#2E86AB", alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "#C73E1D") +
theme_minimal() +
labs(title = "Relación entre MEDV y LSTAT",
subtitle = paste("Correlación:", round(cor(Boston$medv, Boston$lstat), 3)),
x = "% Población de estatus bajo (LSTAT)",
y = "Valor mediano de vivienda (MEDV)")ggplot(Boston, aes(x = rm, y = medv)) +
geom_point(color = "#A23B72", alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "#F18F01") +
theme_minimal() +
labs(title = "Relación entre MEDV y RM",
subtitle = paste("Correlación:", round(cor(Boston$medv, Boston$rm), 3)),
x = "Número promedio de habitaciones (RM)",
y = "Valor mediano de vivienda (MEDV)")ggplot(Boston, aes(x = crim, y = medv)) +
geom_point(color = "#C73E1D", alpha = 0.6, size = 2) +
geom_smooth(method = "loess", se = TRUE, color = "#2E86AB") +
scale_x_log10() +
theme_minimal() +
labs(title = "Relación de MEDV vs CRIM",
subtitle = "Escala logarítmica en eje X",
x = "Tasa de Crimen (log10)",
y = "Valor Mediano de Vivienda (MEDV)")# Crear categorías basadas en el valor de las viviendas
Boston <- Boston %>%
mutate(categoria_medv = case_when(
medv < 20 ~ "Bajo",
medv >= 20 & medv < 30 ~ "Medio",
medv >= 30 ~ "Alto"
))
# Estadísticas por categoría
Boston %>%
group_by(categoria_medv) %>%
summarise(
n = n(),
media_rm = mean(rm),
media_lstat = mean(lstat),
media_crim = mean(crim),
media_tax = mean(tax)
)## # A tibble: 3 × 6
## categoria_medv n media_rm media_lstat media_crim media_tax
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Alto 84 7.29 5.14 0.686 305.
## 2 Bajo 210 5.92 18.6 7.37 507.
## 3 Medio 212 6.25 9.78 1.05 351.
# Visualización por categorías
ggplot(Boston, aes(x = categoria_medv, y = rm, fill = categoria_medv)) +
geom_boxplot(alpha = 0.7) +
scale_fill_manual(values = c("Bajo" = "#F18F01", "Medio" = "#2E86AB", "Alto" = "#A23B72")) +
theme_minimal() +
labs(title = "Número de habitaciones por categoría de valor",
x = "Categoría de valor",
y = "Número promedio de habitaciones") +
theme(legend.position = "none")El análisis revela que:
Este análisis fue generado el 2026-03-09 utilizando R y R Markdown.