AnĂ¡lisis de la base de datos
head(vivienda)
# A tibble: 6 Ă— 13
id zona piso estrato preciom areaconst parqueaderos banios habitaciones
<dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1147 Zona O… <NA> 3 250 70 1 3 6
2 1169 Zona O… <NA> 3 320 120 1 2 3
3 1350 Zona O… <NA> 3 350 220 2 2 4
4 5992 Zona S… 02 4 400 280 3 5 3
5 1212 Zona N… 01 5 260 90 1 2 3
6 1724 Zona N… 01 5 240 87 1 3 3
# ℹ 4 more variables: tipo <chr>, barrio <chr>, longitud <dbl>, latitud <dbl>
AnĂ¡lisis descriptivo
[1] 8322 13
spc_tbl_ [8,322 Ă— 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ id : num [1:8322] 1147 1169 1350 5992 1212 ...
$ zona : chr [1:8322] "Zona Oriente" "Zona Oriente" "Zona Oriente" "Zona Sur" ...
$ piso : chr [1:8322] NA NA NA "02" ...
$ estrato : num [1:8322] 3 3 3 4 5 5 4 5 5 5 ...
$ preciom : num [1:8322] 250 320 350 400 260 240 220 310 320 780 ...
$ areaconst : num [1:8322] 70 120 220 280 90 87 52 137 150 380 ...
$ parqueaderos: num [1:8322] 1 1 2 3 1 1 2 2 2 2 ...
$ banios : num [1:8322] 3 2 2 5 2 3 2 3 4 3 ...
$ habitaciones: num [1:8322] 6 3 4 3 3 3 3 4 6 3 ...
$ tipo : chr [1:8322] "Casa" "Casa" "Casa" "Casa" ...
$ barrio : chr [1:8322] "20 de julio" "20 de julio" "20 de julio" "3 de julio" ...
$ longitud : num [1:8322] -76.5 -76.5 -76.5 -76.5 -76.5 ...
$ latitud : num [1:8322] 3.43 3.43 3.44 3.44 3.46 ...
- attr(*, "spec")=List of 3
..$ cols :List of 13
.. ..$ id : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ zona : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ piso : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ estrato : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ preciom : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ areaconst : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ parqueaderos: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ banios : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ habitaciones: list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ tipo : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ barrio : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_character" "collector"
.. ..$ longitud : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
.. ..$ latitud : list()
.. .. ..- attr(*, "class")= chr [1:2] "collector_double" "collector"
..$ default: list()
.. ..- attr(*, "class")= chr [1:2] "collector_guess" "collector"
..$ delim : chr ";"
..- attr(*, "class")= chr "col_spec"
- attr(*, "problems")=<externalptr>
id zona piso estrato
Min. : 1 Length:8322 Length:8322 Min. :3.000
1st Qu.:2080 Class :character Class :character 1st Qu.:4.000
Median :4160 Mode :character Mode :character Median :5.000
Mean :4160 Mean :4.634
3rd Qu.:6240 3rd Qu.:5.000
Max. :8319 Max. :6.000
NA's :3 NA's :3
preciom areaconst parqueaderos banios
Min. : 58.0 Min. : 30.0 Min. : 1.000 Min. : 0.000
1st Qu.: 220.0 1st Qu.: 80.0 1st Qu.: 1.000 1st Qu.: 2.000
Median : 330.0 Median : 123.0 Median : 2.000 Median : 3.000
Mean : 433.9 Mean : 174.9 Mean : 1.835 Mean : 3.111
3rd Qu.: 540.0 3rd Qu.: 229.0 3rd Qu.: 2.000 3rd Qu.: 4.000
Max. :1999.0 Max. :1745.0 Max. :10.000 Max. :10.000
NA's :2 NA's :3 NA's :1605 NA's :3
habitaciones tipo barrio longitud
Min. : 0.000 Length:8322 Length:8322 Min. :-76.59
1st Qu.: 3.000 Class :character Class :character 1st Qu.:-76.54
Median : 3.000 Mode :character Mode :character Median :-76.53
Mean : 3.605 Mean :-76.53
3rd Qu.: 4.000 3rd Qu.:-76.52
Max. :10.000 Max. :-76.46
NA's :3 NA's :3
latitud
Min. :3.333
1st Qu.:3.381
Median :3.416
Mean :3.418
3rd Qu.:3.452
Max. :3.498
NA's :3
id zona piso estrato preciom areaconst
3 3 2638 3 2 3
parqueaderos banios habitaciones tipo barrio longitud
1605 3 3 3 3 3
latitud
3
[1] 3514
Base sin datos faltantes
# Eliminar registros con datos faltantes
viviendaSNA <- na.omit(vivienda)
# VerificaciĂ³n rĂ¡pida
dim(viviendaSNA)
[1] 4808 13
sum(!complete.cases(viviendaSNA))
[1] 0
VisualizaciĂ³n en Viewer
# Nota: View() funciona en RStudio Viewer (no en el HTML knit)
View(vivienda)
View(viviendaSNA)
AnĂ¡lisis de componentes principales
viviendaZ <- scale(viviendaSNA[,c(5:9)])
head(viviendaZ)
preciom areaconst parqueaderos banios habitaciones
[1,] -0.1756310 0.7609789 1.0779092 1.3178809 -0.4241459
[2,] -0.6055839 -0.6129041 -0.7415001 -0.9022913 -0.4241459
[3,] -0.6670057 -0.6345970 -0.7415001 -0.1622339 -0.4241459
[4,] -0.7284276 -0.8876807 0.1682046 -0.9022913 -0.4241459
[5,] -0.4520293 -0.2730489 0.1682046 -0.1622339 0.3272519
[6,] -0.4213184 -0.1790463 0.1682046 0.5778235 1.8300475
summary(viviendaZ)
preciom areaconst parqueaderos banios
Min. :-1.2259 Min. :-0.9745 Min. :-0.7415 Min. :-2.3824
1st Qu.:-0.6532 1st Qu.:-0.6491 1st Qu.:-0.7415 1st Qu.:-0.9023
Median :-0.3292 Median :-0.3743 Median : 0.1682 Median :-0.1622
Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
3rd Qu.: 0.3157 3rd Qu.: 0.3633 3rd Qu.: 0.1682 3rd Qu.: 0.5778
Max. : 4.7350 Max. : 9.5828 Max. : 7.4458 Max. : 5.0182
habitaciones
Min. :-2.6783
1st Qu.:-0.4241
Median :-0.4241
Mean : 0.0000
3rd Qu.: 0.3273
Max. : 4.8356
library(mice)
md.pattern(viviendaZ)
/\ /\
{ `---' }
{ O O }
==> V <== No need for mice. This data set is completely observed.
\ \|/ /
`-----'

preciom areaconst parqueaderos banios habitaciones
4808 1 1 1 1 1 0
0 0 0 0 0 0
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
res.pca <-prcomp(viviendaZ)
fviz_eig(res.pca, addlabels = TRUE)
Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
Ignoring empty aesthetic: `width`.

fviz_pca_var(res.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#FF7F00", "#034D94"),
repel = TRUE # Avoid text overlapping
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the ggpubr package.
Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
This warning is displayed once per session.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
ℹ The deprecated feature was likely used in the factoextra package.
Please report the issue at <https://github.com/kassambara/factoextra/issues>.
This warning is displayed once per session.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.

AnĂ¡lisis de conglomerados**
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.1.5
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ lubridate 1.9.5 ✔ tibble 3.3.1
✔ purrr 1.2.1 ✔ tidyr 1.3.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::combine() masks gridExtra::combine()
✖ dplyr::filter() masks mice::filter(), stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tibble::view() masks summarytools::view()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
viviendaZ <- as.data.frame(viviendaZ)
# distancia euclidiana
dist_viv <- dist(viviendaZ, method = 'euclidean')
# ClĂºster jerĂ¡rquico con el mĂ©todo complete
hc_viv <- hclust(dist_viv, method = 'complete')
# Determinamos a dĂ³nde pertenece cada observaciĂ³n
cluster_assigments <- cutree(hc_viv, k = 4)
# asignamos los clusters
assigned_cluster <- viviendaZ %>% mutate(cluster = as.factor(cluster_assigments))
set.seed(123)
# Muestreo para visualizacion clara
viv_muestra <- viviendaZ %>% dplyr::slice_sample(n = min(200, nrow(viviendaZ)))
dist_muestra <- dist(viv_muestra, method = "euclidean")
hc_muestra <- hclust(dist_muestra, method = "complete")
plot(
hc_muestra,
cex = 0.6,
main = "Dendrograma (muestra viviendaZ)",
las = 1,
ylab = "Distancia euclidiana",
xlab = "Observaciones"
)
rect.hclust(hc_muestra, k = 4, border = 2:5)

# Visualizacion de clusters en 2D con PCA (muestra)
res_pca_m <- prcomp(viv_muestra)
cluster_m <- cutree(hc_muestra, k = 4)
pca_df <- data.frame(
PC1 = res_pca_m$x[, 1],
PC2 = res_pca_m$x[, 2],
cluster = factor(cluster_m)
)
ggplot(pca_df, aes(PC1, PC2, color = cluster)) +
geom_point(alpha = 0.8, size = 2) +
labs(
title = "Clusters de viviendaZ (muestra)",
x = "PC1",
y = "PC2",
color = "Cluster"
) +
theme_minimal()

AnĂ¡lisis de correspondencia