## id zona piso estrato
## Min. : 1 Length:8330 Min. : 1.000 Min. :3.000
## 1st Qu.:2082 Class :character 1st Qu.: 2.000 1st Qu.:4.000
## Median :4164 Mode :character Median : 3.000 Median :5.000
## Mean :4164 Mean : 3.772 Mean :4.634
## 3rd Qu.:6246 3rd Qu.: 5.000 3rd Qu.:5.000
## Max. :8319 Max. :12.000 Max. :6.000
## NA's :3 NA's :2641 NA's :3
## preciom areaconst parquea banios
## Min. : 58.0 Min. : 30 Min. : 1.000 Min. : 0.000
## 1st Qu.: 220.0 1st Qu.: 80 1st Qu.: 1.000 1st Qu.: 2.000
## Median : 330.0 Median : 123 Median : 2.000 Median : 3.000
## Mean : 434.2 Mean : 175 Mean : 1.836 Mean : 3.112
## 3rd Qu.: 540.0 3rd Qu.: 229 3rd Qu.: 2.000 3rd Qu.: 4.000
## Max. :1999.0 Max. :1745 Max. :10.000 Max. :10.000
## NA's :2 NA's :3 NA's :1606 NA's :3
## habitac tipo barrio longitud
## Min. : 0.000 Length:8330 Length:8330 Min. :-76576.00
## 1st Qu.: 3.000 Class :character Class :character 1st Qu.:-76506.00
## Median : 3.000 Mode :character Mode :character Median : -76.54
## Mean : 3.605 Mean :-21845.13
## 3rd Qu.: 4.000 3rd Qu.: -76.52
## Max. :10.000 Max. : -76.46
## NA's :3 NA's :3
## latitud
## Min. : 3.333
## 1st Qu.: 3.390
## Median : 3.450
## Mean : 970.370
## 3rd Qu.:3367.000
## Max. :3497.000
## NA's :3
## Descriptive Statistics
## vf_sin_na
## N: 4808
##
## areaconst banios estrato habitac id latitud longitud
## ----------------- ----------- --------- --------- --------- --------- --------- -----------
## Mean 174.76 3.22 4.84 3.56 4426.92 725.01 -16216.63
## Std.Dev 138.29 1.35 0.93 1.33 2306.71 1393.47 31203.96
## Min 40.00 0.00 3.00 0.00 1.00 3.33 -76576.00
## Q1 85.00 2.00 4.00 3.00 2478.50 3.38 -76.55
## Median 123.00 3.00 5.00 3.00 4473.50 3.43 -76.54
## Q3 225.00 4.00 6.00 4.00 6413.50 3.49 -76.52
## Max 1500.00 10.00 6.00 10.00 8316.00 3493.00 -76.46
## MAD 74.13 1.48 1.48 0.00 2926.65 0.08 0.02
## IQR 140.00 2.00 2.00 1.00 3934.50 0.10 0.03
## CV 0.79 0.42 0.19 0.37 0.52 1.92 -1.92
## Skewness 2.57 0.90 -0.36 1.84 -0.10 1.41 -1.42
## SE.Skewness 0.04 0.04 0.04 0.04 0.04 0.04 0.04
## Kurtosis 10.97 0.82 -0.76 5.27 -1.13 0.00 0.00
## N.Valid 4808.00 4808.00 4808.00 4808.00 4808.00 4808.00 4808.00
## Pct.Valid 100.00 100.00 100.00 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## parquea piso preciom
## ----------------- --------- --------- ---------
## Mean 1.82 3.89 457.19
## Std.Dev 1.10 2.67 325.62
## Min 1.00 1.00 58.00
## Q1 1.00 2.00 244.00
## Median 2.00 3.00 350.00
## Q3 2.00 5.00 560.00
## Max 10.00 12.00 1999.00
## MAD 1.48 1.48 192.74
## IQR 1.00 3.00 315.50
## CV 0.61 0.69 0.71
## Skewness 2.28 1.23 1.84
## SE.Skewness 0.04 0.04 0.04
## Kurtosis 7.83 0.84 3.53
## N.Valid 4808.00 4808.00 4808.00
## Pct.Valid 100.00 100.00 100.00
## 4. Resultados
##### ANÁLISIS VIVIENDA FALTANTES ####
###### 1. Librerias ####
library(paqueteMETODOS)
library(dplyr)
library(ggplot2)
library(naniar)
library(tidyverse)
library(psych)
###### 2. Carga datos #####
data(vivienda_faltantes)
summary(vivienda_faltantes)
psych::describe(vivienda_faltantes)
summarytools::descr(vivienda_faltantes)
glimpse(vivienda_faltantes)
###### 3. Eliminación de datos repetidos ######
vfmod_No_rep <- unique(vivienda_faltantes) #Quedan 8321 observaciones
###### 4. Faltantes en porcentaje ######
vis_miss(vfmod_No_rep)
### Corrección de Latitud y Longitud
vfmod_No_rep <- vfmod_No_rep %>%
mutate(latitud = ifelse(latitud > 1000, latitud/1000, latitud))
vfmod_No_rep <- vfmod_No_rep %>%
mutate(longitud = ifelse(abs(longitud) > 1000, longitud/1000, longitud))
### Gráfica de latidud y longitud
colores <- c("red", "blue", "green")
ggplot(vfmod_No_rep, aes(x = latitud, y = longitud)) +
geom_point(color = colores)
ggplot(vfmod_No_rep, aes(x = latitud, y = longitud)) + geom_point(color = "green")
### Se seleccionan los casos con variables completas
vf_sin_na <- vfmod_No_rep[complete.cases(vfmod_No_rep),]
#df_si_na <- vfmod_No_rep[!complete.cases(vfmod_No_rep),]
vf_sin_na[c("zona", "tipo", "barrio")] <- lapply(vf_sin_na [c("zona", "tipo", "barrio")], tolower)
vf_sin_na <- vf_sin_na %>% mutate(tipo = ifelse(tipo == "apto", "apartamento", tipo))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "alfonso lopez", "alfonso lópez", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "alfonso lópez i", "alfonso lopez", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "meléndez", "melendez", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "alférez real", "alferez real", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "ciudad jardin", "ciudad jardín", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "ciudad los alamos", "ciudad los álamos", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "ciudad cordoba ", "ciudad córdoba", barrio))
vf_sin_na <- vf_sin_na %>% mutate(barrio = ifelse(barrio == "ciudad meléndez", "ciudad melendez", barrio))
boxplot(vf_sin_na$preciom)
pairs.panels(vf_sin_na, gap=0)
par(mfrow=c(1,3))
hist(vf_sin_na$estrato, col="green", breaks=15, main="Estrato")
hist(vf_sin_na$preciom, col="#87CEFA", breaks=15, main="Precio")
hist(vf_sin_na$areaconst, col="#90EE90", breaks=15, main="Area")
par(mfrow=c(1,3))
boxplot(vf_sin_na$estrato, col="green", main="Estrato")
boxplot(vf_sin_na$preciom, col="blue", main="Precio")
boxplot(vf_sin_na$areaconst, col="orange", main="Area")
par(mfrow=c(1,3))
hist(vf_sin_na$parquea, col="green", breaks=15, main="Parqueaderos")
hist(vf_sin_na$banios, col="#87CEFA", breaks=15, main="Baños")
hist(vf_sin_na$habitac, col="#90EE90", breaks=15, main="Habitaciones")
par(mfrow=c(1,3))
boxplot(vf_sin_na$parquea, col="green", main="Parquea")
boxplot(vf_sin_na$banios, col="blue", main="Bañios")
boxplot(vf_sin_na$habitac, col="orange", main="habitaciones")
media_zonas <- vf_sin_na %>% group_by(zona) %>% summarize(media_precio =mean(preciom))
ggplot(vf_sin_na, aes(x=zona, y=media_zonas)) + geom_point(color = "red")
pairs.panels(vf_sin_na, gap=0)
par(mfrow=c(1,3))
hist(vf_sin_na$estrato, col="green", breaks=15, main="Estrato")
hist(vf_sin_na$preciom, col="#87CEFA", breaks=15, main="Precio")
hist(vf_sin_na$areaconst, col="#90EE90", breaks=15, main="Area")
par(mfrow=c(1,3))
boxplot(vf_sin_na$estrato, col="green", main="Estrato")
boxplot(vf_sin_na$preciom, col="blue", main="Precio")
boxplot(vf_sin_na$areaconst, col="orange", main="Area")
par(mfrow=c(1,3))
hist(vf_sin_na$parquea, col="green", breaks=15, main="Parqueaderos")
hist(vf_sin_na$banios, col="#87CEFA", breaks=15, main="Baños")
hist(vf_sin_na$habitac, col="#90EE90", breaks=15, main="Habitaciones")
par(mfrow=c(1,3))
boxplot(vf_sin_na$parquea, col="green", main="Parquea")
boxplot(vf_sin_na$banios, col="blue", main="Bañios")
boxplot(vf_sin_na$habitac, col="orange", main="habitaciones"))
barplot(media_zonas$media_precio, names.arg = media_zonas$zona, col=topo.colors(5), main = "Graf",
xlab = "Zona", ylab ="media_precio")
vf_sin_na %>% ggplot(aes(x=preciom, fill = zona)) + geom_density(alpha=0.5) +
theme(legend.position = "bottom")