Iniciamos realizando el cargue de los datos y la validacion
library(readxl)
datos=read_excel("C:/Users/icm2363a/Documents/R/Datos_Vivienda.xlsx")
head(datos)
## # A tibble: 6 x 12
## Zona piso Estrato precio_millon Area_contruida parqueaderos Banos
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Zona Sur 2 6 880 237 2 5
## 2 Zona Oeste 2 4 1200 800 3 6
## 3 Zona Sur 3 5 250 86 NA 2
## 4 Zona Sur NA 6 1280 346 4 6
## 5 Zona Sur 2 6 1300 600 4 7
## 6 Zona Sur 3 6 513 160 2 4
## # ... with 5 more variables: Habitaciones <dbl>, Tipo <chr>, Barrio <chr>,
## # cordenada_longitud <dbl>, Cordenada_latitud <dbl>
require(RecordLinkage)
## Loading required package: RecordLinkage
## Loading required package: DBI
## Loading required package: RSQLite
## Loading required package: ff
## Loading required package: bit
##
## Attaching package: 'bit'
## The following object is masked from 'package:base':
##
## xor
## Attaching package ff
## - getOption("fftempdir")=="C:/Users/icm2363a/AppData/Local/Temp/RtmpsjYnz3/ff"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush" -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==16777216 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==536870912 -- consider a different value for tuning your system
##
## Attaching package: 'ff'
## The following objects are masked from 'package:utils':
##
## write.csv, write.csv2
## The following objects are masked from 'package:base':
##
## is.factor, is.ordered
## RecordLinkage library
## [c] IMBEI Mainz
##
## Attaching package: 'RecordLinkage'
## The following object is masked from 'package:bit':
##
## clone
## The following object is masked from 'package:base':
##
## isFALSE
require(leaflet)
## Loading required package: leaflet
require(ggplot2)
## Loading required package: ggplot2
require(plotly)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ID=1:dim(datos)[1]
datos=data.frame(ID,datos)
pos=which(datos$Barrio=="multicentro")
datos_sub=datos[pos,]
pos2=which(jarowinkler("ingenio",datos$Barrio)>0.8&datos$Tipo=="Apartamento")
datos_sub=datos[pos2,]
head(datos_sub)
## ID Zona piso Estrato precio_millon Area_contruida parqueaderos Banos
## 123 123 Zona Sur 3 5 290 100 1 3
## 221 221 Zona Sur 4 5 360 99 1 3
## 582 582 Zona Sur NA 4 550 197 2 4
## 2397 2397 Zona Sur 2 5 410 136 2 4
## 2446 2446 Zona Sur 4 5 390 198 1 4
## 2792 2792 Zona Sur NA 5 300 147 2 3
## Habitaciones Tipo Barrio cordenada_longitud Cordenada_latitud
## 123 3 Apartamento el ingenio -76.48498 3.41789
## 221 2 Apartamento el ingenio -76.48953 3.49684
## 582 3 Apartamento el ingenio -76.49900 3.47100
## 2397 4 Apartamento el ingenio -76.52000 3.37900
## 2446 3 Apartamento el ingenio -76.52030 3.38197
## 2792 3 Apartamento el ingenio -76.52200 3.38100
promedio_precio=mean(datos_sub$precio_millon,na.rm=TRUE)
mediana_precio=median(datos_sub$precio_millon,na.rm=TRUE)
promedio_area=mean(datos_sub$Area_contruida,na.rm=TRUE)
cantidad_ofertas=length(datos_sub$Zona)
resultado=data.frame(promedio_precio,mediana_precio,promedio_area,cantidad_ofertas)
resultado
## promedio_precio mediana_precio promedio_area cantidad_ofertas
## 1 316.6846 300 117.7467 130
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
pos3=which(datos_sub$Area_contruida<400)
datos_sub2=datos_sub[pos3,]
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.