knitr::opts_chunk$set(warning = TRUE, echo = TRUE)
getwd()
## [1] "C:/Users/ruber/OneDrive/Documentos/Taller 2"
setwd("C:/Users/ruber/OneDrive/Documentos/Taller 2")
datos<-read.csv("C:/Users/ruber/OneDrive/Documentos/Taller 2/Inmuebles_Disponibles_Para_La_Venta_20250520.csv")
dim(datos)
## [1] 448 12
options(scipen=999)
str(datos)
## 'data.frame': 448 obs. of 12 variables:
## $ Codigo : int 17745 17778 17797 17798 12115 12116 12117 17800 2330 2363 ...
## $ Ciudad : chr "BOGOTA" "BOGOTA" "BOGOTA" "PEREIRA" ...
## $ Departamento : chr "CUNDINAMARCA" "CUNDINAMARCA" "CUNDINAMARCA" "RISARALDA" ...
## $ Barrio : chr "" "" "" "" ...
## $ Direccion : chr "CALLE 22D NO. 120-19 -FONTIBÓN" "KR 77 H No. 65 C - 33 SUR" "CR 70 No. 49-77" "CALLES 18 Y 19 CARRERAS 5 Y 6 SALIDA CARRERA 5 No. 18 - 43 / CARRERA 5 CALLES 18 Y 19 No. 18 - 49 / CALLE "| __truncated__ ...
## $ Area.Terreno : int 0 0 0 0 6400000 13162700 13400000 0 559804 302079 ...
## $ Area.Construida : int 0 0 0 0 70000 0 76306 0 0 0 ...
## $ Detalle.Disponibilidad: chr "COMERCIALIZABLE CON RESTRICCION" "COMERCIALIZABLE CON RESTRICCION" "COMERCIALIZABLE CON RESTRICCION" "COMERCIALIZABLE CON RESTRICCION" ...
## $ Estrato : chr "INDUSTRIAL" "DOS" "CUATRO" "CINCO" ...
## $ Precio : num 274462556400000 27076410000000 14482416000000 43343535000000 10428866940000 ...
## $ Tipo.de.Inmueble : chr "BODEGA" "LOTE CON CONSTRUCCION" "CASA" "CLINICA" ...
## $ Datos.Adicionales : chr "" "" "" "" ...
datos[datos==""] <- NA
datos[datos=="0"] <- NA
Total_NA<-sum(is.na(datos))
print(Total_NA)
## [1] 1623
Total_NA / (nrow(datos)*ncol(datos)) * 100
## [1] 30.18973
any(duplicated(datos))
## [1] FALSE
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
library(survival)
Precio <- as.numeric(datos$Precio)
Q1 <- quantile(datos$Precio, 0.25, na.rm = TRUE)
Q3 <- quantile(datos$Precio, 0.75, na.rm = TRUE)
IQR_Valor = Q3 - Q1
LimF <- Q1 - 1.5 * IQR_Valor
LimS <- Q3 + 1.5 * IQR_Valor
datos_filtrados <- datos %>%
filter(!is.na(Precio), !is.na(Departamento), Precio >= LimF, Precio <= LimS) %>%
select(Precio, Departamento)
ggplot(datos_filtrados, aes(x = fct_reorder(Departamento, Precio, .fun = median), y = Precio, fill = Departamento))+
geom_boxplot(outlier.colour = "blue", outlier.shape = 16 )+
labs(
title = "Distribución de Precios de Inmuebles Por Departamento",
x = "Departamento",
y = "Precio"
)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme(legend.position = "none")

library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
mediana <- median(datos$Precio, na.rm =TRUE)
Datos_Dstr <- datos %>%
filter(!is.na(Precio), Precio > 0, Precio <= 1.5*mediana)
Distribucion1 <- skewness(Datos_Dstr$Precio, na.rm = TRUE)
ggplot(Datos_Dstr, aes(x =Datos_Dstr$Precio))+
geom_histogram(bins = 30, fill = "blue", color ="black")+
labs(
title = "Distribución de Precios de Inmuebles (COP)",
x = "Precio",
y = "Frecuencia"
)+
annotate("text", x = Inf, y = Inf, label = paste0("Skewness: ", round(Distribucion1, 2)),
hjust = 1.1, vjust = 2, size = 5, color = "#301")
## Warning: Use of `Datos_Dstr$Precio` is discouraged.
## ℹ Use `Precio` instead.

datos_grafico <- datos %>%
filter(!is.na(Tipo.de.Inmueble), !is.na(Precio)) %>%
group_by(Tipo.de.Inmueble)%>%
summarise(
conteo = n(),
precio_medio = mean(Precio, na.rm =TRUE)
) %>%
arrange(desc(conteo))%>%
slice_head(n=10)
ggplot(datos_grafico, aes(x = reorder(Tipo.de.Inmueble, -conteo)))+
geom_bar(aes(y=conteo), stat = "identity", fill = "purple", alpha = 0.7)+
geom_line(aes(y= precio_medio / max(precio_medio)*max(conteo)),
group = 1, color = "#300", size = 1)+
geom_point(aes(y=precio_medio/max(precio_medio)*max(conteo)),
color = "#300", size = 3)+
scale_y_continuous(
name = "Cantidad de Inmuebles",
sec.axis = sec_axis(~.*max(datos_grafico$precio_medio)/max(datos_grafico$conteo),
name = "Precio Promedio (COP)"))+
labs(title = "Tipos de Inmueble Más Comunes y su Precio Promedio", x = "Tipo de Inmueble")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Estadisticas_Clave <- datos %>%
summarise(
Medina = median(Precio, na.rm = TRUE),
Mediana = mean(Precio,na.rm = TRUE),
SD = sd(Precio, na.rm = TRUE),
Q1 = quantile(datos$Precio, 0.25, na.rm = TRUE),
Q2 = quantile(datos$Precio, 0.50, na.rm = TRUE),
Q3 = quantile(datos$Precio, 0.75, na.rm = TRUE)
)
print(Estadisticas_Clave)
## Medina Mediana SD Q1 Q2
## 1 165205000000 9393874307823 38219834168449 125725000000 165205000000
## Q3
## 1 1861440000000
Tabla_F <- datos %>%
group_by(Tipo.de.Inmueble)%>%
summarise(
Frecuencia = n(),
Frecuencia_Relativa = n() / nrow(datos))%>%
arrange(desc(Frecuencia))
print(Tabla_F)
## # A tibble: 19 × 3
## Tipo.de.Inmueble Frecuencia Frecuencia_Relativa
## <chr> <int> <dbl>
## 1 LOCAL 304 0.679
## 2 LOTE VIVIENDA 38 0.0848
## 3 OFICINA 28 0.0625
## 4 CASA 14 0.0312
## 5 BODEGA 8 0.0179
## 6 FINCA 8 0.0179
## 7 LOTE CON CONSTRUCCION 8 0.0179
## 8 LOTE MIXTO 6 0.0134
## 9 APARTAMENTO 5 0.0112
## 10 LOTE AGRICOLA 5 0.0112
## 11 EDIFICIO 4 0.00893
## 12 LOTE 4 0.00893
## 13 LOTE INDUSTRIAL 4 0.00893
## 14 CLINICA 3 0.00670
## 15 LOTE COMERCIAL 3 0.00670
## 16 EDIFICIO VIVIENDA 2 0.00446
## 17 GARAJE 2 0.00446
## 18 HOTEL 1 0.00223
## 19 LOTE NO URBANIZABLE 1 0.00223
options(scipen = 999)
Promedio_P_Estrato <- datos %>%
filter(!is.na(Estrato), !is.na(Precio)) %>%
group_by(Estrato)%>%
summarise(
Inmuebles = n(),
precio_medio = mean(Precio, na.rm =TRUE),
precio_mediana = median(Precio, na.rm = TRUE),
) %>%
arrange(desc(Estrato))
print(Promedio_P_Estrato)
## # A tibble: 9 × 4
## Estrato Inmuebles precio_medio precio_mediana
## <chr> <int> <dbl> <dbl>
## 1 UNO 1 9.82e11 9.82e11
## 2 TRES 16 3.87e13 7.01e12
## 3 SEIS 18 7.06e12 2.13e12
## 4 RURAL 36 3.08e13 1.54e13
## 5 INDUSTRIAL 3 1.22e14 7.09e13
## 6 DOS 32 1.30e13 3.96e11
## 7 CUATRO 13 7.84e12 1.48e12
## 8 COMERCIAL 320 4.16e12 1.27e11
## 9 CINCO 8 1.60e13 9.59e12
Promedio_P_Ciudad <- datos %>%
filter(!is.na(Ciudad), !is.na(Precio)) %>%
group_by(Ciudad)%>%
summarise(
Inmuebles = n(),
precio_medio = mean(Precio, na.rm =TRUE),
precio_mediana = median(Precio, na.rm = TRUE),
) %>%
arrange(desc(Ciudad))
print(Promedio_P_Ciudad)
## # A tibble: 48 × 4
## Ciudad Inmuebles precio_medio precio_mediana
## <chr> <int> <dbl> <dbl>
## 1 YUMBO 1 6.88e12 6.88e12
## 2 VILLAVICENCIO 285 1.76e11 1.26e11
## 3 VILLA RICA 1 7.09e13 7.09e13
## 4 TURBO 1 1.70e13 1.70e13
## 5 TIBU 1 5.81e12 5.81e12
## 6 TENJO 1 6.37e12 6.37e12
## 7 TARAZA 1 8.08e11 8.08e11
## 8 SOGAMOSO 1 1.47e14 1.47e14
## 9 SOATA 1 6.83e12 6.83e12
## 10 SANTANDER DE QUILICHAO 2 7.25e12 7.25e12
## # ℹ 38 more rows
library(dplyr)
library(sf)
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.4.3
library(stringr)
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.4.3
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(tidygeocoder)
## Warning: package 'tidygeocoder' was built under R version 4.4.3
##
## Adjuntando el paquete: 'tidygeocoder'
## The following object is masked from 'package:ggmap':
##
## geocode
library(viridis)
## Warning: package 'viridis' was built under R version 4.4.3
## Cargando paquete requerido: viridisLite
base_mapa <- st_read("C:/Users/ruber/OneDrive/Documentos/Taller 2/Colombia Datos")%>%
select(DPTO_CNMBR, geometry)%>%
rename(Departamento = DPTO_CNMBR)
## Reading layer `COLOMBIA' from data source
## `C:\Users\ruber\OneDrive\Documentos\Taller 2\Colombia Datos'
## using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
## Geodetic CRS: MAGNA-SIRGAS
inmuebles_P <- datos %>%
group_by(Departamento) %>%
summarise(
Precio_Bill = sum(Precio, na.rm = TRUE) / 1e12
) %>%
arrange(desc(Precio_Bill))
deptos_shapefile <- unique(base_mapa$DPTO_CNMBR)
deptos_csv <- unique(inmuebles_P$Departamento)
st_geometry(base_mapa)
## Geometry set for 33 features
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
## Geodetic CRS: MAGNA-SIRGAS
## First 5 geometries:
## MULTIPOLYGON (((-76.41355 8.873829, -76.40465 8...
## MULTIPOLYGON (((-74.84946 11.09778, -74.84938 1...
## MULTIPOLYGON (((-74.07059 4.828562, -74.07036 4...
## MULTIPOLYGON (((-76.17318 9.387846, -76.17287 9...
## MULTIPOLYGON (((-72.17368 7.053077, -72.17277 7...
names(base_mapa)
## [1] "Departamento" "geometry"
ggplot(base_mapa) +
geom_sf(fill = "red", color = "white") +
theme_void()+
labs(title = "Mapa de Colombia")

inmuebles_P <- datos %>%
group_by(Departamento) %>%
summarise(
Precio_Bill = sum(Precio, na.rm = TRUE) / 1e12
) %>%
arrange(desc(Precio_Bill))
Mapa_Final <- base_mapa %>%
left_join(inmuebles_P)%>%
mutate(
Precio_Bill = ifelse(is.na(Precio_Bill), 0, Precio_Bill)
)
## Joining with `by = join_by(Departamento)`
Mapa_Final <- Mapa_Final %>%
mutate(
Rango_Precio = case_when(
Precio_Bill == 0 ~ "Sin datos (0 B)",
Precio_Bill > 0 & Precio_Bill <= 100 ~ "Bajo (0-100 B)",
Precio_Bill > 100 & Precio_Bill <= 500 ~ "Medio (100-500 B)",
Precio_Bill > 500 ~ "Alto (>500 B)",
TRUE ~ NA_character_
),
Rango_Precio = factor(
Rango_Precio,
levels = c("Sin datos (0 B)", "Bajo (0-100 B)", "Medio (100-500 B)", "Alto (>500 B)")
)
)
Mapa_Final <- st_transform(Mapa_Final, crs = 4326)
paleta_roja <- colorNumeric(
palette = c("#C6F4D6", "#8BC34A", "#3E8E41", "#2E865F", "#228B22", "#1A6D1A", "#145214", "#032B03"),
domain = Mapa_Final$Precio_Bill
)
etiquetas <- sprintf(
"<strong>%s</strong><br/>Precio: <b>%s billones</b>",
Mapa_Final$Departamento,
round(Mapa_Final$Precio_Bill, 1))%>%
lapply(htmltools::HTML)
leaflet(Mapa_Final)%>%
addProviderTiles(providers$CartoDB.Positron)%>%
addPolygons(
fillColor = ~paleta_roja(Precio_Bill),
fillOpacity = 0.8,
color = "white",
weight = 1,
smoothFactor = 0.5,
highlightOptions =highlightOptions(
weight = 3,
color = "#00FF00",
fillOpacity = 1,
bringToFront = TRUE,
sendToBack = TRUE
),
label = etiquetas,
labelOptions = labelOptions(
style = list("font-weight"="normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
)%>%
addLegend(
pal = paleta_roja,
values = ~Precio_Bill,
title = "Precio (Billones COP)",
opacity = 0.9,
labFormat = labelFormat(suffix = "B")
)