Santa_Fe

Calculator.

# Define the function 'calculadora'
calculadora <- function(ciudad_suelo, alcaldia, Coastline = FALSE) {

   get_bounding_box <- function(properties, buffer_distance = 3000) {
  # Compute the initial bounding box of the properties
  initial_bbox <- st_bbox(properties)
  
  # Convert bounding box to an sf object and then buffer it
  buffered_bbox <- st_as_sf(st_as_sfc(initial_bbox)) %>%
    st_buffer(buffer_distance)
  
  # Return the updated bounding box
  st_bbox(buffered_bbox)
}

  ciudad_bb =  get_bounding_box(properties = ciudad_suelo )
   
  # Helper function to get OSM data
  osm <- function(ciudad_bb, key, value) {
    opq(bbox = ciudad_bb) %>%
      add_osm_feature(key = key, value = value) %>%
      osmdata_sf()
  }

  # Load and filter all required datasets in one go
  data_list <- list(
    tren = osm(ciudad_bb, "railway", "station")$osm_polygons,
    rio = osm(ciudad_bb, "water", "river")$osm_polygons,
    parques = osm(ciudad_bb, "leisure", "park")$osm_polygons,
    banks = osm(ciudad_bb, "amenity", "bank")$osm_polygons,
    restaurants = osm(ciudad_bb, "amenity", "restaurant")$osm_polygons,
    hospitals = osm(ciudad_bb, "amenity", "hospital")$osm_polygons,
    schools = osm(ciudad_bb, "amenity", "school")$osm_polygons,
        university = osm(ciudad_bb, "amenity", "university")$osm_polygons,
    public_transport = osm(ciudad_bb, "amenity", "bus_station")$osm_polygons,
    shopping_centers = osm(ciudad_bb, "shop", "mall")$osm_polygons,
    police_stations = osm(ciudad_bb, "amenity", "police")$osm_polygons,
    fire_stations = osm(ciudad_bb, "amenity", "fire_station")$osm_polygons
  )

   if (Coastline) {
    data_list$costa <- osm(ciudad_bb, "natural", "coastline")$osm_lines
  }

  # Process parks separately
  parques <- data_list$parques %>%
    mutate(area = st_area(geometry)) %>%
    mutate(area = as.numeric(area))


  # Function to compute closest distance
  compute_closest_distance <- function(features, reference_sf) {
    distances <- st_distance(reference_sf, features)
    closest_distance <- apply(distances, 1, function(x) min(x, na.rm = TRUE))
    return(closest_distance)
  }

  # Calculate distances to each type of amenity
  ciudad_suelo <- ciudad_suelo %>%
    mutate(
      dist_bank = 0.00001 + compute_closest_distance(data_list$banks, ciudad_suelo),
      dist_restaurant = 0.00001 + compute_closest_distance(data_list$restaurants, ciudad_suelo),
      dist_hospital = 0.00001 + compute_closest_distance(data_list$hospitals, ciudad_suelo),
      dist_school = 0.00001 + compute_closest_distance(data_list$schools, ciudad_suelo),
            dist_university = 0.00001 + compute_closest_distance(data_list$university, ciudad_suelo),
,
      dist_public_transport = 0.00001 + compute_closest_distance(data_list$public_transport, ciudad_suelo),
      dist_shopping_center = 0.00001 + compute_closest_distance(data_list$shopping_centers, ciudad_suelo),
      dist_police_station = 0.00001 + compute_closest_distance(data_list$police_stations, ciudad_suelo),
      dist_fire_station = 0.00001 + compute_closest_distance(data_list$fire_stations, ciudad_suelo),
      dist_parque = 0.00001 + compute_closest_distance(data_list$parques, ciudad_suelo),
        dist_tren = 0.00001 + compute_closest_distance(data_list$tren, ciudad_suelo),
      dist_rio = 0.00001 + compute_closest_distance(data_list$rio, ciudad_suelo),
      dist_alcaldia = 0.00001 + compute_closest_distance(alcaldia, ciudad_suelo)
    )

   # Conditionally calculate the coastline distance if Coastline = TRUE
  if (Coastline) {
    ciudad_suelo <- ciudad_suelo %>%
      mutate(dist_costa = 0.00001 + compute_closest_distance(data_list$costa, ciudad_suelo))
  }
  
  # Function to count features within a specified radius
  count_within_radius <- function(reference_sf, features_sf, radius) {
    # Create buffers around each feature in reference_sf
    buffers <- st_buffer(reference_sf, radius)
    
    # Count how many features from features_sf are within each buffer
    counts <- st_intersects(buffers, features_sf, sparse = FALSE) %>% rowSums()
    
    return(counts)
  }

  # Define the radius in meters
  radius <- 500

  
  
  # Calculate counts for banks and restaurants
  ciudad_suelo <- ciudad_suelo %>%
    mutate(
      count_banks = count_within_radius(ciudad_suelo, data_list$banks, radius),
      count_restaurants = count_within_radius(ciudad_suelo, data_list$restaurants, radius)
    )

  # Define the COVID-19 reference date
  covid_date <- ymd("2020-03-19")

  # Create columns for COVID-related information
  ciudad_suelo <- ciudad_suelo %>%
    mutate(
      before_after_covid = ifelse(created_on < covid_date, "0", "1"),
      days_covid = as.numeric(difftime(created_on, covid_date, units = "days"))
    )

  ciudad_suelo <- ciudad_suelo %>%
    filter(days_covid !="")
  # Calculate nearest park area
  nearest_park_idx <- st_nearest_feature(ciudad_suelo, data_list$parques)
  nearest_park_areas <- parques$area[nearest_park_idx]
  ciudad_suelo$nearest_park_area <- nearest_park_areas

  return(ciudad_suelo)
}

Then we create the analyze location formula

analyze_location <- function(lat, lon, dataset, distance_threshold = 55000, jitter_amount = 2 / 111320, pm2_min = 1, pm2_max = 5000, Coastline1 = FALSE) {
  
  # Create the central business district (CBD) point
  location_cbd <- st_as_sf(data.frame(lat = lat, lon = lon), coords = c("lon", "lat"), crs = 4326)
  print("Central Business District (CBD) point created:")
  print(location_cbd)

  # Calculate distance and filter based on distance threshold
  location_data <- dataset %>%
    mutate(dist_location = as.numeric(st_distance(dataset, location_cbd))) %>%
    filter(dist_location < distance_threshold)
  print(paste("Filtered location data based on distance threshold of", distance_threshold, ":"))
  print(paste("Number of rows after filtering based on distance threshold:", nrow(location_data)))

 
  get_bounding_box <- function(location_data, buffer_distance = 3000) {
    # Compute the initial bounding box of the properties
    initial_bbox <- st_bbox(location_data)
    
    # Convert bounding box to an sf object and then buffer it
    buffered_bbox <- st_as_sf(st_as_sfc(initial_bbox)) %>%
      st_buffer(buffer_distance)
    
    # Return the updated bounding box
    st_bbox(buffered_bbox)
  }

  # Get bounding box and calculate distances
  location_sample_bb <- get_bounding_box(location_data, 3000)
  print("Bounding box for the sampled data calculated:")
  print(location_sample_bb)

  # Calculate 'pm2' based on property_type
  location_sample <- location_data %>%
    mutate(price = as.numeric(price_usd), 
           surface_covered = as.numeric(surface_covered),
           c = as.numeric(surface_total)) %>%
    filter(property_type %in% c("Casa", "Lote", "Departamento")) %>%
    mutate(pm2 = case_when(
      property_type %in% c("Casa", "Departamento") ~ price_usd / if_else(is.na(surface_covered) | surface_covered == 0 | surface_covered == "", surface_total, surface_covered),
      property_type == "Lote" ~ price_usd / surface_total,
      TRUE ~ NA_real_ ))

  location_sample <- location_sample %>%
    filter(pm2 > pm2_min, pm2 < pm2_max, surface_total != "")
  print("Sampled data after calculating pm2:")
  print(paste("Number of rows after pm2 calculation and filtering by property type and pm2:", nrow(location_sample)))

  # Calculate mean and standard deviation of dist_alcaldia
  dist_alcaldia_mean <- mean(location_sample$dist_location, na.rm = TRUE)
  dist_alcaldia_sd <- sd(location_sample$dist_location, na.rm = TRUE)

  # Filter based on dist_alcaldia
  location_sample <- location_sample %>%
    filter(dist_location < dist_alcaldia_mean + 3.5 * dist_alcaldia_sd)
  print(paste("Filtered data based on dist_alcaldia < mean + 3.5 * sd:"))
  print(paste("Number of rows after filtering based on dist_alcaldia:", nrow(location_sample)))

  # Remove duplicates
  location_sample <- location_sample %>% distinct(geometry, pm2, .keep_all = TRUE)
  print("Sampled data after removing duplicates:")
  print(paste("Number of rows after removing duplicates:", nrow(location_sample)))

  # Calculate distances
  location_sample_distances <- calculadora(location_sample, alcaldia = location_cbd, Coastline = Coastline1)
  print("Calculated distances within the sample data:")
  print(paste("Number of rows after calculating distances:", nrow(location_sample_distances)))

  # Apply jitter to the distances
  location_sample_distances <- st_jitter(location_sample_distances, amount = jitter_amount)
  print(paste("Jittered sample data with amount:", jitter_amount))
  print(paste("Number of rows after applying jitter:", nrow(location_sample_distances)))

  return(location_sample_distances)
}

  # Helper function to get OSM data
  osm <- function(ciudad_bb, key, value) {
    opq(bbox = ciudad_bb) %>%
      add_osm_feature(key = key, value = value) %>%
      osmdata_sf()
  }

Argentina = st_read("C:/Users/sixto/OneDrive/Documentos/LSE/Dissertation/Properatiads/Ecuador/Argentina_properati.geojson")

## Reading layer `Argentina_properati' from data source 
##   `C:\Users\sixto\OneDrive\Documentos\LSE\Dissertation\Properatiads\Ecuador\Argentina_properati.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 1121514 features and 20 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -80.20466 ymin: -54.84032 xmax: -0.000686646 ymax: 26.72034
## Geodetic CRS:  WGS 84

Argentina =  Argentina %>% st_set_crs(4326)

Now lets go with rosario

rosario_cbd = st_as_sf(data.frame(lon=-60.666596, lat=-32.964924), coords=c("lon", "lat"), crs=4326)

mapview(rosario_cbd)

parque_baigorria = st_as_sf(data.frame(lon=-60.695289, lat=-32.867059), coords=c("lon", "lat"), crs=4326)           
parque_baigorria_buffer = st_buffer(parque_baigorria, 5000)

mapview(parque_baigorria)  + mapview(parque_baigorria_buffer)

ciudad_bb <- st_bbox(parque_baigorria_buffer)

# Public Transport
tren <- osm(ciudad_bb, "railway", "station")$osm_polygons
public_transport <- osm(ciudad_bb, "amenity", "bus_station")$osm_polygons

# Water Features
rio <- osm(ciudad_bb, "water", "river")$osm_polygons

# Parks and Leisure
parques <- osm(ciudad_bb, "leisure", "park")$osm_polygons
sports_grounds <- osm(ciudad_bb, "leisure", "stadium")$osm_polygons
theaters <- osm(ciudad_bb, "leisure", "theatre")$osm_polygons

# Commercial and Retail
offices <- osm(ciudad_bb, "landuse", "commercial")$osm_polygons
shopping_centers <- osm(ciudad_bb, "shop", "mall")$osm_polygons
retail_shops <- osm(ciudad_bb, "shop", "retail")$osm_polygons
supermarkets <- osm(ciudad_bb, "shop", "supermarket")$osm_polygons
convenience_stores <- osm(ciudad_bb, "shop", "convenience")$osm_polygons
cafes <- osm(ciudad_bb, "amenity", "cafe")$osm_polygons
restaurants <- osm(ciudad_bb, "amenity", "restaurant")$osm_polygons
bars <- osm(ciudad_bb, "amenity", "bar")$osm_polygons
pubs <- osm(ciudad_bb, "amenity", "pub")$osm_polygons

# Services and Institutions
banks <- osm(ciudad_bb, "amenity", "bank")$osm_polygons
hospitals <- osm(ciudad_bb, "amenity", "hospital")$osm_polygons
schools <- osm(ciudad_bb, "amenity", "school")$osm_polygons
university <- osm(ciudad_bb, "amenity", "university")$osm_polygons
libraries <- osm(ciudad_bb, "amenity", "library")$osm_polygons
gyms <- osm(ciudad_bb, "amenity", "gym")$osm_polygons
police_stations <- osm(ciudad_bb, "amenity", "police")$osm_polygons
fire_stations <- osm(ciudad_bb, "amenity", "fire_station")$osm_polygons
post_offices <- osm(ciudad_bb, "amenity", "post_office")$osm_polygons

# Cultural and Entertainment
museums <- osm(ciudad_bb, "tourism", "museum")$osm_polygons
cinemas <- osm(ciudad_bb, "amenity", "cinema")$osm_polygons

# Industrial Areas
industrial <- osm(ciudad_bb, "landuse", "industrial")$osm_polygons

# Helper function to check and create mapview only if the dataset is not empty
create_mapview <- function(data, color, layer_name) {
  if (nrow(data) > 0) {
    return(mapview(data, col.regions = color, layer.name = layer_name))
  } else {
    return(NULL)
  }
}

# Create mapview objects for each category with colors, checking if there's data
map_rio <- create_mapview(rio, "blue", "Rivers")
map_parques <- create_mapview(parques, "green", "Parks")
map_banks <- create_mapview(banks, "purple", "Banks")
map_restaurants <- create_mapview(restaurants, "orange", "Restaurants")
map_hospitals <- create_mapview(hospitals, "pink", "Hospitals")
map_schools <- create_mapview(schools, "yellow", "Schools")
map_public_transport <- create_mapview(public_transport, "brown", "Bus Stations")
map_shopping_centers <- create_mapview(shopping_centers, "cyan", "Shopping Centers")
map_offices <- create_mapview(offices, "red", "Commercial Areas")
map_industrial <- create_mapview(industrial, "gray", "Industrial Areas")
map_museums <- create_mapview(museums, "lightblue", "Museums")
map_libraries <- create_mapview(libraries, "lightgreen", "Libraries")
map_gyms <- create_mapview(gyms, "lightcoral", "Gyms")
map_sports_grounds <- create_mapview(sports_grounds, "yellowgreen", "Sports Grounds")
map_cafes <- create_mapview(cafes, "lightyellow", "Cafes")
map_retail_shops <- create_mapview(retail_shops, "lightpink", "Retail Shops")
map_bars <- create_mapview(bars, "brown", "Bars")
map_pubs <- create_mapview(pubs, "darkgreen", "Pubs")
map_supermarkets <- create_mapview(supermarkets, "gold", "Supermarkets")
map_convenience_stores <- create_mapview(convenience_stores, "lightgray", "Convenience Stores")
map_theaters <- create_mapview(theaters, "lightsteelblue", "Theaters")
map_cinemas <- create_mapview(cinemas, "lightsalmon", "Cinemas")

# Combine all the maps into one interactive map, only including those with data
combined_map <- list(
  map_rio,
  map_parques,
  map_banks,
  map_restaurants,
  map_hospitals,
  map_schools,
  map_public_transport,
  map_shopping_centers,
  map_offices,
  map_industrial,
  map_museums,
  map_libraries,
  map_gyms,
  map_sports_grounds,
  map_cafes,
  map_retail_shops,
  map_bars,
  map_pubs,
  map_supermarkets,
  map_convenience_stores,
  map_theaters,
  map_cinemas
)

# Remove NULLs (empty datasets) and combine the rest into a single map
combined_map <- combined_map[!sapply(combined_map, is.null)]

# Display the combined map
if (length(combined_map) > 0) {
  combined_map <- Reduce(`+`, combined_map)
  combined_map
} else {
  print("No data available to display.")
}

parque_baigorria = parque_baigorria %>% st_set_crs(4326)

# Define parameters
distance_threshold <- 2500
jitter_amount <- 2 / 111320
pm2_min <- 1
pm2_max <- 4000


# Calculate distance and filter based on distance threshold
location_data <- Argentina %>%
  mutate(dist_location = as.numeric(st_distance(Argentina, parque_baigorria))) %>%
  filter(dist_location < distance_threshold)
print(paste("Filtered location data based on distance threshold of", distance_threshold, ":"))

## [1] "Filtered location data based on distance threshold of 2500 :"

print(paste("Number of rows after filtering based on distance threshold:", nrow(location_data)))

## [1] "Number of rows after filtering based on distance threshold: 1444"

# Calculate 'pm2' based on property_type
location_sample <- location_data %>%
  mutate(price = as.numeric(price_usd), 
         surface_covered = as.numeric(surface_covered),
         c = as.numeric(surface_total)) %>%
  filter(property_type %in% c("Casa", "Lote", "Departamento")) %>%
  mutate(pm2 = case_when(
    property_type %in% c("Casa", "Departamento") ~ price_usd / if_else(is.na(surface_covered) | surface_covered == 0 | surface_covered == "", surface_total, surface_covered),
    property_type == "Lote" ~ price_usd / surface_total,
    TRUE ~ NA_real_ ))

location_sample <- location_sample %>%
  filter(pm2 > pm2_min, pm2 < pm2_max, surface_total != "")  %>%   filter(!(currency_id == "ARS" & price == price_usd))
print("Sampled data after calculating pm2:")

## [1] "Sampled data after calculating pm2:"

print(paste("Number of rows after pm2 calculation and filtering by property type and pm2:", nrow(location_sample)))

## [1] "Number of rows after pm2 calculation and filtering by property type and pm2: 1294"

# Remove duplicates
location_sample <- location_sample %>% distinct(geometry, pm2, .keep_all = TRUE)
print("Sampled data after removing duplicates:")

## [1] "Sampled data after removing duplicates:"

print(paste("Number of rows after removing duplicates:", nrow(location_sample)))

## [1] "Number of rows after removing duplicates: 964"

library(dplyr)
library(mapview)

# Convert 'pm2' to numeric and clean character columns
location_sample <- location_sample %>%
  mutate(
    pm2 = as.numeric(as.character(pm2)),  # Ensure pm2 is numeric
    across(where(is.character), ~iconv(., "latin1", "UTF-8", sub = ""))  # Clean character columns
  ) %>%
  filter(!is.na(pm2))  # Remove rows with NA in pm2

# Render the map
mapview(location_sample, zcol = "pm2")

# Handle missing values (if any)
location_sample_clean <- location_sample %>%
  filter(!is.na(pm2) & !is.na(price_usd) & !is.na(surface_total) & !is.na(property_type)) %>% filter(surface_total <3000)

# Summary statistics
summary_stats <- location_sample_clean %>%
  group_by(property_type) %>%
  summarize(
    avg_pm2 = mean(pm2, na.rm = TRUE),
    median_pm2 = median(pm2, na.rm = TRUE),
    avg_price_usd = mean(price_usd, na.rm = TRUE),
    median_price_usd = median(price_usd, na.rm = TRUE),
    avg_surface_total = mean(surface_total, na.rm = TRUE),
    median_surface_total = median(surface_total, na.rm = TRUE),
    count = n()
  )
print(summary_stats)

## Simple feature collection with 3 features and 8 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -60.72115 ymin: -32.88927 xmax: -60.68705 ymax: -32.84485
## Geodetic CRS:  WGS 84
## # A tibble: 3 × 9
##   property_type avg_pm2 median_pm2 avg_price_usd median_price_usd
##   <chr>           <dbl>      <dbl>         <dbl>            <dbl>
## 1 Casa            1079.      1038.       152531.           125000
## 2 Departamento    1716.      1517.       124587.            86000
## 3 Lote             267.       224         88663.            56000
## # ℹ 4 more variables: avg_surface_total <dbl>, median_surface_total <dbl>,
## #   count <int>, geometry <MULTIPOINT [°]>

Vamos con Rosario

perform_analysis <- function(Argentina, analysis_location, distance_threshold, pm2_min = 1, pm2_max = 8000) {
  # Calculate distances from the analysis location
  location_data <- Argentina %>%
    mutate(dist_location = as.numeric(st_distance(., analysis_location))) %>%
    filter(dist_location < distance_threshold)
  
  # Ensure geometries are valid
  location_data <- st_make_valid(location_data)
  
  # Filter and calculate price per square meter (pm2) based on property type
  filtered_data <- location_data %>%
    mutate(
      price = as.numeric(price_usd),
      surface_covered = as.numeric(surface_covered),
      surface_total = as.numeric(surface_total)
    ) %>%
    filter(property_type %in% c("Casa", "Lote", "Departamento")) %>%
    mutate(pm2 = case_when(
      property_type %in% c("Casa", "Departamento") ~ price_usd / if_else(is.na(surface_covered) | surface_covered == 0, surface_total, surface_covered),
      property_type == "Lote" ~ price_usd / surface_total,
      TRUE ~ NA_real_
    )) %>%
    filter(
      pm2 > pm2_min, pm2 < pm2_max,
      surface_total != "",
      !(currency_id == "ARS" & price == price_usd)
    )
  
  # Calculate mean and standard deviation for distance filtering
  dist_mean <- mean(filtered_data$dist_location, na.rm = TRUE)
  dist_sd <- sd(filtered_data$dist_location, na.rm = TRUE)
  
  # Final filtering
  filtered_data <- filtered_data %>%
    filter(dist_location < dist_mean + 3.5 * dist_sd) %>%
    distinct(geometry, pm2, .keep_all = TRUE)
  
  # Statistical analysis on property type
  property_stats <- filtered_data %>%
    group_by(property_type) %>%
    summarise(
      count = n(),
      percentage = (n() / nrow(filtered_data)) * 100,
      avg_pm2 = mean(pm2, na.rm = TRUE),
      sd_pm2 = sd(pm2, na.rm = TRUE)
    ) %>%
    ungroup()
  
  # Print summary statistics
  print("Statistical Analysis on Property Types:")
  print(property_stats)
  
  # Return the filtered dataset and property statistics
  list(
    filtered_data = filtered_data,
    property_stats = property_stats
  )
}

result <- perform_analysis(
  Argentina = Argentina,
  analysis_location = rosario_cbd,  # Must be an sf object
  distance_threshold = 50000,
  pm2_min = 1,
  pm2_max = 8000
)

## [1] "Statistical Analysis on Property Types:"
## Simple feature collection with 3 features and 5 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -61.02088 ymin: -33.26251 xmax: -60.42124 ymax: -32.68018
## Geodetic CRS:  WGS 84
## # A tibble: 3 × 6
##   property_type count percentage avg_pm2 sd_pm2                         geometry
##   <chr>         <int>      <dbl>   <dbl>  <dbl>                 <MULTIPOINT [°]>
## 1 Casa          14561       26.9   1158.   585. ((-60.95921 -32.89661), (-60.98…
## 2 Departamento  34102       63.0   1837.   638. ((-60.7851 -32.77288), (-60.901…
## 3 Lote           5491       10.1    231.   499. ((-60.9742 -32.91102), (-61.018…

# Access the filtered dataset
Rosario <- result$filtered_data
filtered_data <- result$filtered_data

# Access the statistical analysis
Rosario_Estadisticas <- result$property_stats

analyze_pm2_distribution <- function(filtered_data) {
  # Cargar las librerías necesarias
  library(plotly)
  library(dplyr)
  
  # Asegurarse de que las columnas necesarias existan
  if (!all(c("property_type", "pm2") %in% colnames(filtered_data))) {
    stop("Los datos de entrada deben tener las columnas 'property_type' y 'pm2'.")
  }
  
  # Eliminar valores faltantes o inválidos de pm2
  filtered_data <- filtered_data %>% filter(!is.na(pm2) & pm2 > 0)
  
  # Ajustar la distribución normal de pm2 para cada tipo de propiedad
  pm2_distributions <- filtered_data %>%
    group_by(property_type) %>%
    summarise(
      mean_pm2 = mean(pm2, na.rm = TRUE),
      sd_pm2 = sd(pm2, na.rm = TRUE),
      .groups = "drop"
    )
  
  # Imprimir las estadísticas resumen para cada tipo de propiedad
  print("Resumen de las distribuciones de PM2 por tipo de propiedad:")
  print(pm2_distributions)
  
  # Unir las estadísticas resumen con los datos principales
  filtered_data <- st_drop_geometry(filtered_data) %>%
    left_join(pm2_distributions, by = "property_type")
  
 # Generar y mostrar gráficos interactivos para cada tipo de propiedad
  unique_property_types <- unique(filtered_data$property_type)
  
  for (property in unique_property_types) {
    # Filtrar los datos para el tipo de propiedad actual
    property_data <- filtered_data %>% filter(property_type == property)
    
    # Crear el gráfico interactivo
    p <- plot_ly(property_data, x = ~pm2, type = "histogram", histnorm = "density", 
                 marker = list(color = '#5A9BD5', opacity = 0.7, line = list(color = '#2F6A8A', width = 1))) %>%
      layout(
        title = list(
          text = paste("Distribución de PM2 para", property),
          font = list(size = 20, color = "#333")
        ),
        xaxis = list(
          title = "Precio por metro cuadrado (PM2)",
          titlefont = list(size = 15),
          showgrid = FALSE
        ),
        yaxis = list(
          title = "Densidad",
          titlefont = list(size = 15),
          showgrid = FALSE
        ),
        plot_bgcolor = '#f9f9f9',
        paper_bgcolor = '#ffffff',
        showlegend = FALSE,
        hovermode = "closest",
        margin = list(l = 50, r = 50, b = 50, t = 70)
      )
    
    # Imprimir el gráfico para mostrarlo
    print(p)
  }
}

# Call the function with the filtered dataset
analyze_pm2_distribution(filtered_data)

## Warning: package 'plotly' was built under R version 4.3.2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

## [1] "Resumen de las distribuciones de PM2 por tipo de propiedad:"
## Simple feature collection with 3 features and 3 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -61.02088 ymin: -33.26251 xmax: -60.42124 ymax: -32.68018
## Geodetic CRS:  WGS 84
## # A tibble: 3 × 4
##   property_type mean_pm2 sd_pm2                                         geometry
##   <chr>            <dbl>  <dbl>                                 <MULTIPOINT [°]>
## 1 Casa             1158.   585. ((-60.95921 -32.89661), (-60.98721 -32.9103), (…
## 2 Departamento     1837.   638. ((-60.7851 -32.77288), (-60.90173 -32.8946), (-…
## 3 Lote              231.   499. ((-60.9742 -32.91102), (-61.01896 -32.87779), (…

Baigorria_3000 <- perform_analysis(
  Argentina = Argentina,
  analysis_location = parque_baigorria,  # Must be an sf object
  distance_threshold = 3000,
  pm2_min = 1,
  pm2_max = 6000
)

## [1] "Statistical Analysis on Property Types:"
## Simple feature collection with 3 features and 5 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -60.72318 ymin: -32.89401 xmax: -60.68631 ymax: -32.84054
## Geodetic CRS:  WGS 84
## # A tibble: 3 × 6
##   property_type count percentage avg_pm2 sd_pm2                         geometry
##   <chr>         <int>      <dbl>   <dbl>  <dbl>                 <MULTIPOINT [°]>
## 1 Casa            890      68.9    1064.   453. ((-60.69722 -32.84054), (-60.69…
## 2 Departamento    278      21.5    1853.  1131. ((-60.7054 -32.85132), (-60.701…
## 3 Lote            123       9.53    287.   248. ((-60.69731 -32.84282), (-60.69…

analyze_pm2_distribution(Baigorria_3000$filtered_data)

## [1] "Resumen de las distribuciones de PM2 por tipo de propiedad:"
## Simple feature collection with 3 features and 3 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -60.72318 ymin: -32.89401 xmax: -60.68631 ymax: -32.84054
## Geodetic CRS:  WGS 84
## # A tibble: 3 × 4
##   property_type mean_pm2 sd_pm2                                         geometry
##   <chr>            <dbl>  <dbl>                                 <MULTIPOINT [°]>
## 1 Casa             1064.   453. ((-60.69722 -32.84054), (-60.69723 -32.84059), …
## 2 Departamento     1853.  1131. ((-60.7054 -32.85132), (-60.70194 -32.84876), (…
## 3 Lote              287.   248. ((-60.69731 -32.84282), (-60.69738 -32.84273), …

library(e1071)
library(plotly)
library(sf)
library(dplyr)

# Define the function for clustering, plotting with Plotly, and cluster details
cluster_and_plot_plotly <- function(data, selected_columns, k = 6, m = 6) {
  # Select the relevant columns for clustering
  subset_data <- data[selected_columns]
  
  # Drop the geometry column if present
  subset_data_1 <- st_drop_geometry(subset_data)
  
  # Convert property_type to numeric (if it's a factor or character)
  if (is.character(subset_data_1$property_type)) {
    subset_data_1$property_type <- as.numeric(factor(subset_data_1$property_type))
  }
  
  # Remove rows with missing data
  subset_data_clean <- subset_data_1 %>% filter(!is.na(pm2) & !is.na(property_type))
  
  # Scale the data for clustering
  subset_data_clean_scaled <- scale(subset_data_clean)
  
  # Perform FCM clustering
  fcm_model <- cmeans(subset_data_clean_scaled, centers = k, m = m, iter.max = 1000)
  
  # Check if fcm_model$u exists (membership matrix)
  if ("u" %in% names(fcm_model)) {
    # Assign the cluster with the highest membership
    subset_data_clean$cluster <- apply(fcm_model$u, 1, which.max)
  } else {
    # If fcm_model$u doesn't exist, use fcm_model$cluster
    subset_data_clean$cluster <- fcm_model$cluster
  }
  
  # Re-attach the geometry to the clustered data
  subset_data_clean_sf <- cbind(data, cluster = subset_data_clean$cluster)
  
  # Create an interactive plot with Plotly
  plot <- subset_data_clean_sf %>%
    plot_ly(
      type = "scattermapbox",
      mode = "markers",
      color = ~factor(cluster),  # Color by cluster
      colors = c("red", "blue", "green", "purple", "orange"),  # Assign colors to clusters
      hoverinfo = "text",
      text = ~paste("Cluster:", cluster, 
                    "<br>PM2:", pm2, 
                    "<br>Property Type:", property_type),
      marker = list(size = 5)
    ) %>%
    layout(
      title = "Clustering Map of PM2 and Property Type",
      mapbox = list(
        style = "open-street-map",  # Use OpenStreetMap as background
        zoom = 10,
        center = list(lon = mean(st_coordinates(subset_data_clean_sf)[,1]),
                      lat = mean(st_coordinates(subset_data_clean_sf)[,2]))
      )
    )
  
  # Calculate and print cluster details (compositions)
  cluster_details <- subset_data_clean %>%
    group_by(cluster) %>%
    summarise(
      count = n(),
      avg_pm2 = mean(pm2, na.rm = TRUE),
      avg_property_type = mean(property_type, na.rm = TRUE)
    )
  
  print(cluster_details)  # Print the cluster composition
  
  # Return the Plotly plot
  return(plot)
}

# Example usage of the function with your dataset
cluster_and_plot_plotly(Baigorria_3000$filtered_data, selected_columns = c("pm2", "property_type"))

## # A tibble: 6 × 4
##   cluster count avg_pm2 avg_property_type
##     <int> <int>   <dbl>             <dbl>
## 1       1   251   1613.              1   
## 2       2   125    271.              2.98
## 3       3   161   1114.              2.01
## 4       4   332    629.              1   
## 5       5   118   2936.              1.97
## 6       6   304   1067.              1

# Example usage of the function with your dataset
cluster_and_plot_plotly(Rosario, selected_columns = c("pm2", "property_type"))

## # A tibble: 6 × 4
##   cluster count avg_pm2 avg_property_type
##     <int> <int>   <dbl>             <dbl>
## 1       1 10974    631.              2.48
## 2       2  6542    697.              1   
## 3       3  8712   1489.              2.01
## 4       4 10161   2613.              1.99
## 5       5  9971   1829.              2.01
## 6       6  7794   1479.              1

Santa_Fe

Sixto Cristiani

2025-01-10