Analisis de datos vivienda
library(readxl)
library(ggplot2)
library(CGP)
Cargar mis datos
setwd("C:/Users/MyPC/Desktop/Posgrado - Analisis estrategico de datos/Ejercicio Nivelacion Python")
viviendas=read_excel("datos_vivienda.xlsx")
head(viviendas)
## # A tibble: 6 × 2
## Area_contruida precio_millon
## <dbl> <dbl>
## 1 86 250
## 2 118 385
## 3 130 395
## 4 181 419
## 5 86 240
## 6 98 320
attach(viviendas)
Estadisticos descriptivos
min_area=min(Area_contruida)
prom_area=mean(Area_contruida)
desv_area=(Area_contruida)
p25_area=quantile(Area_contruida,prob=c(0.25))
p50_area=quantile(Area_contruida,prob=c(0.50))
p75_area=quantile(Area_contruida,prob=c(0.75))
max_area=max(Area_contruida)
data.frame(prom_area,desv_area,p25_area,p50_area,p75_area,min_area,max_area)
## Warning in data.frame(prom_area, desv_area, p25_area, p50_area, p75_area, : row
## names were found from a short variable and have been discarded
## prom_area desv_area p25_area p50_area p75_area min_area max_area
## 1 115.7469 86.00 86 97 130 80 195
## 2 115.7469 118.00 86 97 130 80 195
## 3 115.7469 130.00 86 97 130 80 195
## 4 115.7469 181.00 86 97 130 80 195
## 5 115.7469 86.00 86 97 130 80 195
## 6 115.7469 98.00 86 97 130 80 195
## 7 115.7469 170.00 86 97 130 80 195
## 8 115.7469 96.00 86 97 130 80 195
## 9 115.7469 85.00 86 97 130 80 195
## 10 115.7469 170.00 86 97 130 80 195
## 11 115.7469 87.00 86 97 130 80 195
## 12 115.7469 118.42 86 97 130 80 195
## 13 115.7469 86.00 86 97 130 80 195
## 14 115.7469 85.00 86 97 130 80 195
## 15 115.7469 96.00 86 97 130 80 195
## 16 115.7469 86.00 86 97 130 80 195
## 17 115.7469 86.00 86 97 130 80 195
## 18 115.7469 130.00 86 97 130 80 195
## 19 115.7469 134.00 86 97 130 80 195
## 20 115.7469 80.00 86 97 130 80 195
## 21 115.7469 130.00 86 97 130 80 195
## 22 115.7469 87.00 86 97 130 80 195
## 23 115.7469 130.00 86 97 130 80 195
## 24 115.7469 89.00 86 97 130 80 195
## 25 115.7469 195.00 86 97 130 80 195
## 26 115.7469 170.00 86 97 130 80 195
min_precio=min(precio_millon)
prom_precio=mean(precio_millon)
desv_precio=(precio_millon)
p25_precio=quantile(precio_millon,prob=c(0.25))
p50_precio=quantile(precio_millon,prob=c(0.50))
p75_precio=quantile(precio_millon,prob=c(0.75))
max_precio=max(precio_millon)
data.frame(min_precio,prom_precio,desv_precio,p25_precio,p50_precio,p75_precio,max_precio)
## Warning in data.frame(min_precio, prom_precio, desv_precio, p25_precio, : row
## names were found from a short variable and have been discarded
## min_precio prom_precio desv_precio p25_precio p50_precio p75_precio
## 1 240 332.0769 250 251.25 305 395
## 2 240 332.0769 385 251.25 305 395
## 3 240 332.0769 395 251.25 305 395
## 4 240 332.0769 419 251.25 305 395
## 5 240 332.0769 240 251.25 305 395
## 6 240 332.0769 320 251.25 305 395
## 7 240 332.0769 480 251.25 305 395
## 8 240 332.0769 268 251.25 305 395
## 9 240 332.0769 240 251.25 305 395
## 10 240 332.0769 450 251.25 305 395
## 11 240 332.0769 240 251.25 305 395
## 12 240 332.0769 385 251.25 305 395
## 13 240 332.0769 290 251.25 305 395
## 14 240 332.0769 240 251.25 305 395
## 15 240 332.0769 272 251.25 305 395
## 16 240 332.0769 250 251.25 305 395
## 17 240 332.0769 250 251.25 305 395
## 18 240 332.0769 395 251.25 305 395
## 19 240 332.0769 385 251.25 305 395
## 20 240 332.0769 255 251.25 305 395
## 21 240 332.0769 430 251.25 305 395
## 22 240 332.0769 260 251.25 305 395
## 23 240 332.0769 385 251.25 305 395
## 24 240 332.0769 290 251.25 305 395
## 25 240 332.0769 450 251.25 305 395
## 26 240 332.0769 410 251.25 305 395
## max_precio
## 1 480
## 2 480
## 3 480
## 4 480
## 5 480
## 6 480
## 7 480
## 8 480
## 9 480
## 10 480
## 11 480
## 12 480
## 13 480
## 14 480
## 15 480
## 16 480
## 17 480
## 18 480
## 19 480
## 20 480
## 21 480
## 22 480
## 23 480
## 24 480
## 25 480
## 26 480
ggplot(viviendas,aes(x=Area_contruida))+
geom_histogram()+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
hist(viviendas$Area_contruida, main = "Distribución de Área Construida", xlab = "Área (m2)", ylab = "Frecuencia", col = "steelblue", border = "White")