library(readxl)
datos= read_excel("D:/GUIANCARLO_2022/MAESTRIA CIENCIA DE DATOS/Metodos y Simulacion Estadistica/unidad 1_Analisis exploratorio de datos/olx_viviendas_cali.xlsx")
ID=1:dim(datos)[1]
datos=data.frame(ID,datos)

pos=which(datos$Barrio=="la buitrera")
datos_sub=datos[pos,]

require(RecordLinkage)

pos2=which(jarowinkler("la buitrera",datos$Barrio)>0.8 & datos$Tipo=="Apartamento")
datos_sub=datos[pos2,]
head(datos_sub)
##        ID             ID.1
## 278   278  1564442091-6558
## 380   380 1564442091-13511
## 628   628  1564442091-8299
## 727   727  1564442091-9881
## 765   765  1564442091-6689
## 1175 1175  1564442091-6972
##                                                                                 URL
## 278    https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4697992.aspx
## 380  https://www.fincaraiz.com.co/apartamento-en-venta/cali/rivera-det-4377195.aspx
## 628    https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4775240.aspx
## 727    https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4681132.aspx
## 765  https://www.fincaraiz.com.co/apartamento-en-venta/cali/rivera-det-4714162.aspx
## 1175   https://www.fincaraiz.com.co/apartamento-en-venta/cali/base-det-4355691.aspx
##      ciudad         Zona    Barrio Cordenada_latitud cordenada_longitud
## 278    Cali Zona Oriente   la base          3.454231          -76.48642
## 380    Cali   Zona Norte la rivera          3.476087          -76.48969
## 628    Cali Zona Oriente   la base          3.449750          -76.49559
## 727    Cali Zona Oriente   la base          3.451287          -76.49780
## 765    Cali   Zona Norte la rivera          3.474790          -76.49847
## 1175   Cali Zona Oriente   la base          3.453025          -76.50461
##             Tipo piso Estrato Area_contruida parqueaderos Baños Habitaciones
## 278  Apartamento    5       3             62            1     2            3
## 380  Apartamento   NA       3             48            1     1            3
## 628  Apartamento    1       3             56           NA     1            3
## 727  Apartamento    2       3             60           NA     2            3
## 765  Apartamento    3       3             60           NA     1            3
## 1175 Apartamento    2       3             80           NA     2            3
##        Precio
## 278  1.13e+08
## 380  8.00e+07
## 628  1.36e+08
## 727  1.15e+08
## 765  8.50e+07
## 1175 9.90e+07
##Tabla de indicadores importantes

promedio_precio=mean(datos_sub$Precio,na.rm=TRUE)
mediana_precio=median(datos_sub$Precio,na.rm=TRUE)
promedio_area=mean(datos_sub$Area_contruida,na.rm=TRUE)
cantidad_ofertas=length(datos_sub$Zona)

resultado=data.frame(promedio_precio,mediana_precio,promedio_area,cantidad_ofertas)
resultado
##   promedio_precio mediana_precio promedio_area cantidad_ofertas
## 1       104666667       1.06e+08            61                6
promedio_area_construida=mean(datos_sub$Area_contruida,na.rm=TRUE)
mediana_area_construida=median(datos_sub$Area_contruida,na.rm=TRUE)
promedio_area_construida=mean(datos_sub$Area_contruida,na.rm=TRUE)
cantidad_ofertas=length(datos_sub$Zona)

resultado=data.frame(promedio_area_construida,mediana_area_construida,promedio_area_construida,cantidad_ofertas)
resultado
##   promedio_area_construida mediana_area_construida promedio_area_construida.1
## 1                       61                      60                         61
##   cantidad_ofertas
## 1                6
require(leaflet)
## Loading required package: leaflet
leaflet()%>%addCircleMarkers(lng=datos_sub$cordenada_longitud,lat=datos_sub$Cordenada_latitud,radius=0.3,color="black",label=datos_sub$ID)%>%addTiles()
require(ggplot2)
## Loading required package: ggplot2
require(plotly)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
g1=ggplot(data=datos_sub, aes(y=Precio,x=Area_contruida))+geom_point()+geom_smooth()
ggplotly(g1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 47.84
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 12.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.0285e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 406.43
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 47.84
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 12.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 5.0285e-017
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 406.43
pos3=which(datos_sub$Area_contruida<800)
datos_sub2=datos_sub[pos3,]

g2=ggplot(data=datos_sub2,aes(y=Precio,x=Area_contruida))+geom_point()+geom_smooth()
ggplotly(g2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 47.84
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 12.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 5.0285e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 406.43
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 47.84
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 12.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 5.0285e-017
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 406.43