library(readxl)
datos= read_excel("D:/GUIANCARLO_2022/MAESTRIA CIENCIA DE DATOS/Metodos y Simulacion Estadistica/unidad 1_Analisis exploratorio de datos/olx_viviendas_cali.xlsx")
ID=1:dim(datos)[1]
datos=data.frame(ID,datos)
pos=which(datos$Barrio=="la buitrera")
datos_sub=datos[pos,]
require(RecordLinkage)
pos2=which(jarowinkler("la buitrera",datos$Barrio)>0.8 & datos$Tipo=="Apartamento")
datos_sub=datos[pos2,]
head(datos_sub)
## ID ID.1
## 278 278 1564442091-6558
## 380 380 1564442091-13511
## 628 628 1564442091-8299
## 727 727 1564442091-9881
## 765 765 1564442091-6689
## 1175 1175 1564442091-6972
## URL
## 278 https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4697992.aspx
## 380 https://www.fincaraiz.com.co/apartamento-en-venta/cali/rivera-det-4377195.aspx
## 628 https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4775240.aspx
## 727 https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4681132.aspx
## 765 https://www.fincaraiz.com.co/apartamento-en-venta/cali/rivera-det-4714162.aspx
## 1175 https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4355691.aspx
## ciudad Zona Barrio Cordenada_latitud cordenada_longitud
## 278 Cali Zona Oriente la base 3.454231 -76.48642
## 380 Cali Zona Norte la rivera 3.476087 -76.48969
## 628 Cali Zona Oriente la base 3.449750 -76.49559
## 727 Cali Zona Oriente la base 3.451287 -76.49780
## 765 Cali Zona Norte la rivera 3.474790 -76.49847
## 1175 Cali Zona Oriente la base 3.453025 -76.50461
## Tipo piso Estrato Area_contruida parqueaderos Baños Habitaciones
## 278 Apartamento 5 3 62 1 2 3
## 380 Apartamento NA 3 48 1 1 3
## 628 Apartamento 1 3 56 NA 1 3
## 727 Apartamento 2 3 60 NA 2 3
## 765 Apartamento 3 3 60 NA 1 3
## 1175 Apartamento 2 3 80 NA 2 3
## Precio
## 278 1.13e+08
## 380 8.00e+07
## 628 1.36e+08
## 727 1.15e+08
## 765 8.50e+07
## 1175 9.90e+07
##Tabla de indicadores importantes
promedio_precio=mean(datos_sub$Precio,na.rm=TRUE)
mediana_precio=median(datos_sub$Precio,na.rm=TRUE)
promedio_area=mean(datos_sub$Area_contruida,na.rm=TRUE)
cantidad_ofertas=length(datos_sub$Zona)
resultado=data.frame(promedio_precio,mediana_precio,promedio_area,cantidad_ofertas)
resultado
## promedio_precio mediana_precio promedio_area cantidad_ofertas
## 1 104666667 1.06e+08 61 6
promedio_area_construida=mean(datos_sub$Area_contruida,na.rm=TRUE)
mediana_area_construida=median(datos_sub$Area_contruida,na.rm=TRUE)
promedio_area_construida=mean(datos_sub$Area_contruida,na.rm=TRUE)
cantidad_ofertas=length(datos_sub$Zona)
resultado=data.frame(promedio_area_construida,mediana_area_construida,promedio_area_construida,cantidad_ofertas)
resultado
## promedio_area_construida mediana_area_construida promedio_area_construida.1
## 1 61 60 61
## cantidad_ofertas
## 1 6
require(leaflet)
## Loading required package: leaflet
leaflet()%>%addCircleMarkers(lng=datos_sub$cordenada_longitud,lat=datos_sub$Cordenada_latitud,radius=0.3,color="black",label=datos_sub$ID)%>%addTiles()
require(ggplot2)
## Loading required package: ggplot2
require(plotly)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
g1=ggplot(data=datos_sub, aes(y=Precio,x=Area_contruida))+geom_point()+geom_smooth()
ggplotly(g1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 47.84
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 12.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.0285e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 406.43
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 47.84
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 12.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 5.0285e-017
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 406.43
pos3=which(datos_sub$Area_contruida<800)
datos_sub2=datos_sub[pos3,]
g2=ggplot(data=datos_sub2,aes(y=Precio,x=Area_contruida))+geom_point()+geom_smooth()
ggplotly(g2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 47.84
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 12.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.0285e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 406.43
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 47.84
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 12.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 5.0285e-017
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 406.43