CARGAR LA BASE DE DATOS

library(readr)
inmuebles <- read.csv("C:/Users/carab/Downloads/Inmuebles_Disponibles_Para_La_Venta_20250518.csv.bz2")

PREGUNTAS DE ANALISIS 1

nrow(inmuebles)

## [1] 448

ncol(inmuebles)

## [1] 12

sapply(inmuebles, class)

##                 Codigo                 Ciudad           Departamento 
##              "integer"            "character"            "character" 
##                 Barrio              Direccion           Area.Terreno 
##            "character"            "character"              "integer" 
##        Area.Construida Detalle.Disponibilidad                Estrato 
##              "integer"            "character"            "character" 
##                 Precio       Tipo.de.Inmueble      Datos.Adicionales 
##              "numeric"            "character"            "character"

PUNTO 2: VALORES NULOS Y DUPLICADOS

colSums(is.na(inmuebles))

##                 Codigo                 Ciudad           Departamento 
##                      0                      0                      0 
##                 Barrio              Direccion           Area.Terreno 
##                      0                      0                      0 
##        Area.Construida Detalle.Disponibilidad                Estrato 
##                      0                      0                      0 
##                 Precio       Tipo.de.Inmueble      Datos.Adicionales 
##                      0                      0                      0

colMeans(is.na(inmuebles))*100

##                 Codigo                 Ciudad           Departamento 
##                      0                      0                      0 
##                 Barrio              Direccion           Area.Terreno 
##                      0                      0                      0 
##        Area.Construida Detalle.Disponibilidad                Estrato 
##                      0                      0                      0 
##                 Precio       Tipo.de.Inmueble      Datos.Adicionales 
##                      0                      0                      0

AVERIGUACION DE REGISTROS DUPLICADOS

sum(duplicated(inmuebles))

## [1] 0

PUNTO 3: DEPURACION DE DATOS

options(scipen = 999) 

boxplot(inmuebles$Precio, 
        main = "distribucion del Precio", 
        ylab = "Precio en pesos", 
        col = "blue", 
        outline = TRUE)

boxplot(inmuebles$Area.Construida, 
        main = "distribucion del area construida", 
        ylab = "area en metros", 
        col = "purple", 
        outline = TRUE)

boxplot(inmuebles$Area.Terreno, 
        main = "distribucion del area de terreno", 
        ylab = "area en metros", 
        col = "pink", 
        outline = TRUE)

ANALISIS DESCRIPTIVO 4.

library(ggplot2)

ggplot(inmuebles, aes(x = Precio)) +
  geom_histogram(bins = 30, fill = "skyblue", color = "black") +
  labs(title = "Distribución del Precio de los Inmuebles",
       x = "Precio (en pesos)", y = "Frecuencia") +
  theme_minimal()

AVERIGUAMOS LA DISTRIBUCION DE PRECIOS

library(e1071)

skewness(inmuebles$Precio, na.rm = TRUE)

## [1] 7.256093

AVERIGUAMOS EL INMUEBLE MAS COMUN

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ stringr   1.5.1
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggplot2)

ggplot(inmuebles, aes(x = Tipo.de.Inmueble)) + 
  geom_bar(fill = "steelblue") +
  labs(title = "Tipo de Inmueble", y = "Frecuencia") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ESTADISTICAS CLAVES.

library(dplyr)

inmuebles %>%
  summarise(across(c(Precio, Codigo, Area.Construida, Area.Terreno), list(media=mean),
                   na.rm = TRUE))

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(...)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))

##    Precio_media Codigo_media Area.Construida_media Area.Terreno_media
## 1 9372905838386      17680.2              11551.53            1619546

MEDIANA

inmuebles %>% 
  summarise(
    across(
      c(Precio, Codigo, Area.Construida, Area.Terreno),
      list(mediana = median),
      na.rm = TRUE
    )
  )

##   Precio_mediana Codigo_mediana Area.Construida_mediana Area.Terreno_mediana
## 1   165205000000        18291.5                       0                    0

DESVIACION ESTANDAR.

inmuebles %>%
  summarise(
    across(
      c(Precio, Codigo, Area.Construida, Area.Terreno),
      sd,
      na.rm = TRUE
    )
  )

##           Precio   Codigo Area.Construida Area.Terreno
## 1 38179638410195 2542.956        128517.7     18588576

CUARTILES

summary(inmuebles[, c("Precio", "Codigo", "Area.Terreno", "Area.Construida")])

##      Precio                    Codigo       Area.Terreno      
##  Min.   :              0   Min.   : 2330   Min.   :        0  
##  1st Qu.:   125725000000   1st Qu.:18129   1st Qu.:        0  
##  Median :   165205000000   Median :18292   Median :        0  
##  Mean   :  9372905838390   Mean   :17680   Mean   :  1619546  
##  3rd Qu.:  1861440000000   3rd Qu.:18472   3rd Qu.:        0  
##  Max.   :452337898200000   Max.   :19353   Max.   :321719700  
##  Area.Construida  
##  Min.   :      0  
##  1st Qu.:      0  
##  Median :      0  
##  Mean   :  11552  
##  3rd Qu.:      0  
##  Max.   :2272400

TABLAS DE FRECUENCIAS.

lapply(inmuebles[c("Ciudad", "Departamento", "Barrio")], table)

## $Ciudad
## 
##                    AGUAZUL                  ANGOSTURA 
##                          3                          1 
##                     ARMERO               BARRANQUILLA 
##                          2                         14 
##                    BITUIMA                     BOGOTA 
##                          1                         34 
##            BOLIVAR - CAUCA                       BUGA 
##                          1                          5 
##                       CALI           CALIMA EL DARIEN 
##                          7                         10 
##          CARMEN DE BOLIVAR                  CARTAGENA 
##                          1                          3 
##                   CAUCASIA                       CHIA 
##                          1                          2 
##                     CUCUTA                     CURITI 
##                          9                          1 
##                      DAGUA                  EL AGUILA 
##                          1                          1 
##                  EL PLAYON                   EL ROSAL 
##                          1                          1 
##                   ENVIGADO                      FUNZA 
##                          1                          1 
##                   GIRARDOT                     IBAGUE 
##                          4                          1 
##                  LA CALERA                  LA DORADA 
##                          1                          1 
##                LA VIRGINIA                 LOS PATIOS 
##                          1                          1 
##                     MADRID                  MANIZALES 
##                          1                         13 
##                   MARSELLA                   MEDELLIN 
##                          1                          5 
##                   MONTERIA                    PEREIRA 
##                          6                         12 
##                    PIEDRAS               PUERTO LOPEZ 
##                          1                          1 
##                   RICAURTE SAN ANTONIO DEL TEQUENDAMA 
##                          1                          2 
##     SANTANDER DE QUILICHAO                      SOATA 
##                          2                          1 
##                   SOGAMOSO                     TARAZA 
##                          1                          1 
##                      TENJO                       TIBU 
##                          1                          1 
##                      TURBO                 VILLA RICA 
##                          1                          1 
##              VILLAVICENCIO                      YUMBO 
##                        285                          1 
## 
## $Departamento
## 
##          ANTIOQUIA          ATLÁNTICO            BOLÍVAR             BOYACÁ 
##                 10                 14                  4                  2 
##             CALDAS           CASANARE              CAUCA            CÓRDOBA 
##                 14                  3                  4                  6 
##       CUNDINAMARCA               META NORTE DE SANTANDER          RISARALDA 
##                 49                286                 11                 14 
##          SANTANDER             TOLIMA    VALLE DEL CAUCA 
##                  2                  4                 25 
## 
## $Barrio
## 
##                                         AV 30 DE AGOSTO 
##                         389                          10 
##                  BELLAVISTA                      CENTRO 
##                           1                           3 
##        CIUDADELA CHIPICHAPE CONDOMINIO CAMPESTRE MONACO 
##                           1                           1 
##     CORREGIMIENTO EL CARMEN                   EL CENTRO 
##                           1                           1 
##                     EL HOYO                    EL PRADO 
##                           1                           1 
##                    GUACANDÁ            JOSE MARIA CABAL 
##                           1                           4 
##                   LA CALERA        PARQUE IND CAUCADESA 
##                           1                           1 
## PARQUE INDUSTRIAL CAUCADESA            PRADOS DEL NORTE 
##                           1                           4 
##               SANTA MATILDE                       SINAI 
##                           1                          10 
##                        SUBA                     TINTALA 
##                           1                           1 
##           VEREDA CHAMBIMBAL          VEREDA DE CAMBULAR 
##                           1                           1 
##             VEREDA FONQUETA              VEREDA PALERMO 
##                           1                          10 
##               VILLA DEL SUR 
##                           1

AGRUPACIONES.

library(dplyr)
options(scipen = 999)
inmuebles%>%
  group_by(Estrato)%>%summarise(Promedio_Precio = mean(Precio, na.rm = TRUE))

## # A tibble: 9 × 2
##   Estrato    Promedio_Precio
##   <chr>                <dbl>
## 1 CINCO              1.60e13
## 2 COMERCIAL          4.15e12
## 3 CUATRO             7.84e12
## 4 DOS                1.30e13
## 5 INDUSTRIAL         1.22e14
## 6 RURAL              3.08e13
## 7 SEIS               7.06e12
## 8 TRES               3.87e13
## 9 UNO                9.82e11

HALLAMOS EL PROMEDIO

inmuebles%>%
  group_by(Ciudad)%>%summarise(Promedio_Precio = mean(Precio, na.rm = TRUE))

## # A tibble: 48 × 2
##    Ciudad           Promedio_Precio
##    <chr>                      <dbl>
##  1 AGUAZUL                  1.39e13
##  2 ANGOSTURA                2.46e13
##  3 ARMERO                   4.73e12
##  4 BARRANQUILLA             4.88e12
##  5 BITUIMA                  4.81e12
##  6 BOGOTA                   5.27e13
##  7 BOLIVAR - CAUCA          7.38e12
##  8 BUGA                     9.65e13
##  9 CALI                     3.51e12
## 10 CALIMA EL DARIEN         2.19e13
## # ℹ 38 more rows

ANALISIS ESPACIAL GEOGRAFICO( SI NO HAY COORDENADAS)

library(dplyr)
library(ggplot2)
library(sf)

## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE

library(tmap)

Colombia <- st_read("C:/Users/carab/Downloads/SHP_MGN2021_COLOMBIA/MGN_2021_COLOMBIA/COLOMBIA/COLOMBIA.shp")

## Reading layer `COLOMBIA' from data source 
##   `C:\Users\carab\Downloads\SHP_MGN2021_COLOMBIA\MGN_2021_COLOMBIA\COLOMBIA\COLOMBIA.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -81.73562 ymin: -4.229406 xmax: -66.84722 ymax: 13.39473
## Geodetic CRS:  MAGNA-SIRGAS

MAPAS Y GEOLOCALIZACION.

precio_por_dpto <- inmuebles %>%
  group_by(Departamento) %>%
  summarise(PrecioPromedio = mean(Precio, na.rm = TRUE))


mapa <- left_join(Colombia, precio_por_dpto, by = c("DPTO_CNMBR" = "Departamento"))

tm_shape(mapa) +
  tm_polygons("PrecioPromedio",
              title = "Precio promedio",
              palette = "YlOrRd",
              style = "quantile") +
  tm_layout(main.title = "Mapa de precios por departamento en Colombia")

##

## ── tmap v3 code detected ───────────────────────────────────────────────────────

## [v3->v4] `tm_polygons()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
##   'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.
## 
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

PATRONES GRAFICOS.

ggplot(inmuebles, aes(x = Ciudad, y = Precio)) +
  geom_boxplot(fill = "skyblue", outlier.color = "purple") +
  theme_minimal() +
  labs(title = "Distribucion de precios por ciudad",
       x = "Ciudad",
       y = "Precio del inmueble") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

HALLAMOS LA VARIACION

sin_areaconstruida_en_cero <- inmuebles %>%
  filter(Area.Construida > 0)

RELACIONES ENTRE VARIABLES.

numericas <- select_if(inmuebles, is.numeric) #creamos un data frame con solo variables numericas

correlaciones <- cor(numericas, use = "complete.obs")

correlaciones_con_precio <- sort(correlaciones["Precio", ], decreasing = TRUE)
correlaciones_con_precio

##          Precio Area.Construida    Area.Terreno          Codigo 
##       1.0000000       0.5621413       0.4701345      -0.3018968

ANALISIS MULTIVARIABLE.

library(ggplot2)

ggplot(inmuebles, aes(x = Estrato, y = Precio)) +
  geom_jitter(width = 0.2, alpha = 0.5, color = "darkblue") +
  facet_wrap(~ Tipo.de.Inmueble) +
  labs(title = "Relacion entre Precio y Estrato por Tipo de Inmueble",
       x = "Estrato",
       y = "Precio en pesos") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

informe de analisis inmobiliario

Carol Martinez

2025-05-09

CARGAR LA BASE DE DATOS

PREGUNTAS DE ANALISIS 1

PUNTO 2: VALORES NULOS Y DUPLICADOS

AVERIGUACION DE REGISTROS DUPLICADOS

PUNTO 3: DEPURACION DE DATOS

ANALISIS DESCRIPTIVO 4.

AVERIGUAMOS LA DISTRIBUCION DE PRECIOS

AVERIGUAMOS EL INMUEBLE MAS COMUN

ESTADISTICAS CLAVES.

MEDIANA

DESVIACION ESTANDAR.

CUARTILES

TABLAS DE FRECUENCIAS.

AGRUPACIONES.

HALLAMOS EL PROMEDIO

ANALISIS ESPACIAL GEOGRAFICO( SI NO HAY COORDENADAS)

MAPAS Y GEOLOCALIZACION.

PATRONES GRAFICOS.

HALLAMOS LA VARIACION

RELACIONES ENTRE VARIABLES.

ANALISIS MULTIVARIABLE.