#Librerias
library("rlang")
## Warning: package 'rlang' was built under R version 4.2.2
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%()         masks rlang::%@%()
## ✖ purrr::as_function() masks rlang::as_function()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ purrr::flatten()     masks rlang::flatten()
## ✖ purrr::flatten_chr() masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl() masks rlang::flatten_dbl()
## ✖ purrr::flatten_int() masks rlang::flatten_int()
## ✖ purrr::flatten_lgl() masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw() masks rlang::flatten_raw()
## ✖ purrr::invoke()      masks rlang::invoke()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ purrr::splice()      masks rlang::splice()
library("ggplot2")
library("sf")
## Linking to GEOS 3.9.1, GDAL 3.3.2, PROJ 7.2.1; sf_use_s2() is TRUE
library("osmdata")
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library("rmarkdown")
library("lubridate")
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library("ggmap") 
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library("mapview")
library("hrbrthemes")
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library("RColorBrewer")
library("readr")
library("leaflet")
library("XML")
## Warning: package 'XML' was built under R version 4.2.1
library("osmextract")
## Warning: package 'osmextract' was built under R version 4.2.1
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright.
## Check the package website, https://docs.ropensci.org/osmextract/, for more details.
library("forcats")
library("spatialreg")
## Warning: package 'spatialreg' was built under R version 4.2.2
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.2.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.2.1
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library("spdep")
## Warning: package 'spdep' was built under R version 4.2.2
## Loading required package: sp
## 
## Attaching package: 'spdep'
## The following objects are masked from 'package:spatialreg':
## 
##     get.ClusterOption, get.coresOption, get.mcOption,
##     get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
properati_CABA_Propiedades = read_sf("C:/Users/sixto/OneDrive/Documentos/Di_tella/FIU_ii/Tesis_2/tesis2/properati_CABA_Propiedades1.geojson")
properati_CABA_Propiedades$start_date <- as.Date(properati_CABA_Propiedades$start_date)

properati_CABA_Propiedades <- properati_CABA_Propiedades %>%
  filter( pm2 < (mean(pm2) + (3 * sd(pm2))))
properati_CABA_Propiedades <- properati_CABA_Propiedades %>%
  mutate(surface_covered=ifelse(surface_covered<15, surface_total, surface_covered),
         bathrooms = ifelse(bathrooms== 0, 1, bathrooms),
         bedrooms = ifelse(bedrooms==0, 1, bedrooms)) %>%
  filter(pm2>500)

# Convert start_date to date format

Análisis descriptivo

Creaciones y fechas

library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(scales)


# Create a data frame with the count of each property type by creation date
df <- properati_CABA_Propiedades %>%
  group_by(start_date, property_type) %>%
  summarize(n = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'start_date'. You can override using the
## `.groups` argument.
# Create a time series plot with plotly
p1 <- ggplot(df, aes(x = start_date, y = n, color = property_type)) +
  geom_line(size = 1) +
  labs(title = "Evolution of Property Offer by Type",
       x = "Creation Date",
       y = "Count",
       color = "Property Type") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  scale_y_continuous(labels = comma) +
  guides(color = guide_legend(title = "Property Type", ncol = 1))

ggplotly(p1)
# Create a stacked area plot with plotly
p2 <- ggplot(df, aes(x = start_date, y = n, fill = property_type)) +
  geom_area(size = 0.3, alpha = 0.8) +
  labs(title = "Evolution of Property Offer by Type",
       x = "Creation Date",
       y = "Count",
       fill = "Property Type") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  scale_y_continuous(labels = comma) +
  guides(fill = guide_legend(title = "Property Type", ncol = 1))

ggplotly(p2)
## Warning: Removed 2 rows containing missing values (position_stack).
# Convert start_date to date format
properati_CABA_Propiedades$start_date <- as.Date(properati_CABA_Propiedades$start_date)

# Create a data frame with the average price per square meter and price by creation date
df <- properati_CABA_Propiedades %>%
  group_by(start_date) %>%
  summarize(avg_price_per_sqm = mean(pm2, na.rm = TRUE),
            avg_price = mean(price, na.rm = TRUE), avg_size = median(surface_covered , na.rm = TRUE))


  # Create a scatter plot with plotly for price per square meter
  p1 <- ggplot(df, aes(x = start_date, y = avg_price_per_sqm)) +
    geom_point(size = 2, fill = df$avg_price_per_sqm) +
    labs(title = "Evolution of Average Price per Square Meter",
         x = "Creation Date",
         y = "Price per Square Meter (ARS)") +
    theme_minimal() +
    scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
    scale_y_continuous(labels = comma )
  
  ggplotly(p1)
    p1 <- ggplot(df, aes(x = start_date, y = avg_price_per_sqm)) +
    geom_line(size = 1) +
    labs(title = "Evolution of Average Price per Square Meter",
         x = "Creation Date",
         y = "Price per Square Meter (ARS)") +
    theme_minimal() +
    scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
    scale_y_continuous(labels = comma)
  
  ggplotly(p1)
p3 <- ggplot(df, aes(x = start_date, y = avg_price)) +
  geom_point(size = 2, color = "red") +
  labs(title = "Evolution of Average Property Price",
       x = "Creation Date",
       y = "Price (ARS)") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  scale_y_continuous(labels = comma)

ggplotly(p3)
p4 <- ggplot(df, aes(x = start_date, y = avg_size)) +
  geom_point(size = 2, color = "red") +
  labs(title = "Evolution of Average Property Size",
       x = "Creation Date",
       y = "M2") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m") +
  scale_y_continuous(labels = comma)

ggplotly(p4)
df$covid <- as.numeric(df$start_date >= as.Date("2020-03-01"))

Histograma

histograma_continuo <- function(df, var_name) {
  ggplot(df, aes(x = {{var_name}})) +
    geom_histogram(binwidth=500, color="white", fill="lightblue") +
    labs(title = "Price per square meter distribution",
         x = "Price per square meter (USD)",
         y = "Count") +
    theme_minimal()
}
histograma_categorico <- function(df, var_name) {
  ggplot(df, aes(x = {{var_name}})) +
    geom_bar(stat = "count", binwidth = 500, color = "white", fill = "lightblue") +
    labs(title = ,
         x = "",
         y = "Count") +
    theme_minimal()
}
histograma_continuo(properati_CABA_Propiedades, pm2)

histograma_categorico(properati_CABA_Propiedades, property_type)
## Warning: Ignoring unknown parameters: binwidth

#Casas
histograma_continuo(filter(properati_CABA_Propiedades, property_type =="Casa"), pm2)

#Departamentos

histograma_continuo(filter(properati_CABA_Propiedades, property_type =="Departamento"), pm2)