Calculator.
# Define the function 'calculadora'
calculadora <- function(ciudad_suelo, alcaldia, Coastline = FALSE) {
get_bounding_box <- function(properties, buffer_distance = 3000) {
# Compute the initial bounding box of the properties
initial_bbox <- st_bbox(properties)
# Convert bounding box to an sf object and then buffer it
buffered_bbox <- st_as_sf(st_as_sfc(initial_bbox)) %>%
st_buffer(buffer_distance)
# Return the updated bounding box
st_bbox(buffered_bbox)
}
ciudad_bb = get_bounding_box(properties = ciudad_suelo )
# Helper function to get OSM data
osm <- function(ciudad_bb, key, value) {
opq(bbox = ciudad_bb) %>%
add_osm_feature(key = key, value = value) %>%
osmdata_sf()
}
# Load and filter all required datasets in one go
data_list <- list(
tren = osm(ciudad_bb, "railway", "station")$osm_polygons,
rio = osm(ciudad_bb, "water", "river")$osm_polygons,
parques = osm(ciudad_bb, "leisure", "park")$osm_polygons,
banks = osm(ciudad_bb, "amenity", "bank")$osm_polygons,
restaurants = osm(ciudad_bb, "amenity", "restaurant")$osm_polygons,
hospitals = osm(ciudad_bb, "amenity", "hospital")$osm_polygons,
schools = osm(ciudad_bb, "amenity", "school")$osm_polygons,
university = osm(ciudad_bb, "amenity", "university")$osm_polygons,
public_transport = osm(ciudad_bb, "amenity", "bus_station")$osm_polygons,
shopping_centers = osm(ciudad_bb, "shop", "mall")$osm_polygons,
police_stations = osm(ciudad_bb, "amenity", "police")$osm_polygons,
fire_stations = osm(ciudad_bb, "amenity", "fire_station")$osm_polygons
)
if (Coastline) {
data_list$costa <- osm(ciudad_bb, "natural", "coastline")$osm_lines
}
# Process parks separately
parques <- data_list$parques %>%
mutate(area = st_area(geometry)) %>%
mutate(area = as.numeric(area))
# Function to compute closest distance
compute_closest_distance <- function(features, reference_sf) {
distances <- st_distance(reference_sf, features)
closest_distance <- apply(distances, 1, function(x) min(x, na.rm = TRUE))
return(closest_distance)
}
# Calculate distances to each type of amenity
ciudad_suelo <- ciudad_suelo %>%
mutate(
dist_bank = 0.00001 + compute_closest_distance(data_list$banks, ciudad_suelo),
dist_restaurant = 0.00001 + compute_closest_distance(data_list$restaurants, ciudad_suelo),
dist_hospital = 0.00001 + compute_closest_distance(data_list$hospitals, ciudad_suelo),
dist_school = 0.00001 + compute_closest_distance(data_list$schools, ciudad_suelo),
dist_university = 0.00001 + compute_closest_distance(data_list$university, ciudad_suelo),
,
dist_public_transport = 0.00001 + compute_closest_distance(data_list$public_transport, ciudad_suelo),
dist_shopping_center = 0.00001 + compute_closest_distance(data_list$shopping_centers, ciudad_suelo),
dist_police_station = 0.00001 + compute_closest_distance(data_list$police_stations, ciudad_suelo),
dist_fire_station = 0.00001 + compute_closest_distance(data_list$fire_stations, ciudad_suelo),
dist_parque = 0.00001 + compute_closest_distance(data_list$parques, ciudad_suelo),
dist_tren = 0.00001 + compute_closest_distance(data_list$tren, ciudad_suelo),
dist_rio = 0.00001 + compute_closest_distance(data_list$rio, ciudad_suelo),
dist_alcaldia = 0.00001 + compute_closest_distance(alcaldia, ciudad_suelo)
)
# Conditionally calculate the coastline distance if Coastline = TRUE
if (Coastline) {
ciudad_suelo <- ciudad_suelo %>%
mutate(dist_costa = 0.00001 + compute_closest_distance(data_list$costa, ciudad_suelo))
}
# Function to count features within a specified radius
count_within_radius <- function(reference_sf, features_sf, radius) {
# Create buffers around each feature in reference_sf
buffers <- st_buffer(reference_sf, radius)
# Count how many features from features_sf are within each buffer
counts <- st_intersects(buffers, features_sf, sparse = FALSE) %>% rowSums()
return(counts)
}
# Define the radius in meters
radius <- 500
# Calculate counts for banks and restaurants
ciudad_suelo <- ciudad_suelo %>%
mutate(
count_banks = count_within_radius(ciudad_suelo, data_list$banks, radius),
count_restaurants = count_within_radius(ciudad_suelo, data_list$restaurants, radius)
)
# Define the COVID-19 reference date
covid_date <- ymd("2020-03-19")
# Create columns for COVID-related information
ciudad_suelo <- ciudad_suelo %>%
mutate(
before_after_covid = ifelse(created_on < covid_date, "0", "1"),
days_covid = as.numeric(difftime(created_on, covid_date, units = "days"))
)
ciudad_suelo <- ciudad_suelo %>%
filter(days_covid !="")
# Calculate nearest park area
nearest_park_idx <- st_nearest_feature(ciudad_suelo, data_list$parques)
nearest_park_areas <- parques$area[nearest_park_idx]
ciudad_suelo$nearest_park_area <- nearest_park_areas
return(ciudad_suelo)
}
Then we create the analyze location formula
analyze_location <- function(lat, lon, dataset, distance_threshold = 55000, jitter_amount = 2 / 111320, pm2_min = 1, pm2_max = 5000, Coastline1 = FALSE) {
# Create the central business district (CBD) point
location_cbd <- st_as_sf(data.frame(lat = lat, lon = lon), coords = c("lon", "lat"), crs = 4326)
print("Central Business District (CBD) point created:")
print(location_cbd)
# Calculate distance and filter based on distance threshold
location_data <- dataset %>%
mutate(dist_location = as.numeric(st_distance(dataset, location_cbd))) %>%
filter(dist_location < distance_threshold)
print(paste("Filtered location data based on distance threshold of", distance_threshold, ":"))
print(paste("Number of rows after filtering based on distance threshold:", nrow(location_data)))
get_bounding_box <- function(location_data, buffer_distance = 3000) {
# Compute the initial bounding box of the properties
initial_bbox <- st_bbox(location_data)
# Convert bounding box to an sf object and then buffer it
buffered_bbox <- st_as_sf(st_as_sfc(initial_bbox)) %>%
st_buffer(buffer_distance)
# Return the updated bounding box
st_bbox(buffered_bbox)
}
# Get bounding box and calculate distances
location_sample_bb <- get_bounding_box(location_data, 3000)
print("Bounding box for the sampled data calculated:")
print(location_sample_bb)
# Calculate 'pm2' based on property_type
location_sample <- location_data %>%
mutate(price = as.numeric(price_usd),
surface_covered = as.numeric(surface_covered),
c = as.numeric(surface_total)) %>%
filter(property_type %in% c("Casa", "Lote", "Departamento")) %>%
mutate(pm2 = case_when(
property_type %in% c("Casa", "Departamento") ~ price_usd / if_else(is.na(surface_covered) | surface_covered == 0 | surface_covered == "", surface_total, surface_covered),
property_type == "Lote" ~ price_usd / surface_total,
TRUE ~ NA_real_ ))
location_sample <- location_sample %>%
filter(pm2 > pm2_min, pm2 < pm2_max, surface_total != "")
print("Sampled data after calculating pm2:")
print(paste("Number of rows after pm2 calculation and filtering by property type and pm2:", nrow(location_sample)))
# Calculate mean and standard deviation of dist_alcaldia
dist_alcaldia_mean <- mean(location_sample$dist_location, na.rm = TRUE)
dist_alcaldia_sd <- sd(location_sample$dist_location, na.rm = TRUE)
# Filter based on dist_alcaldia
location_sample <- location_sample %>%
filter(dist_location < dist_alcaldia_mean + 3.5 * dist_alcaldia_sd)
print(paste("Filtered data based on dist_alcaldia < mean + 3.5 * sd:"))
print(paste("Number of rows after filtering based on dist_alcaldia:", nrow(location_sample)))
# Remove duplicates
location_sample <- location_sample %>% distinct(geometry, pm2, .keep_all = TRUE)
print("Sampled data after removing duplicates:")
print(paste("Number of rows after removing duplicates:", nrow(location_sample)))
# Calculate distances
location_sample_distances <- calculadora(location_sample, alcaldia = location_cbd, Coastline = Coastline1)
print("Calculated distances within the sample data:")
print(paste("Number of rows after calculating distances:", nrow(location_sample_distances)))
# Apply jitter to the distances
location_sample_distances <- st_jitter(location_sample_distances, amount = jitter_amount)
print(paste("Jittered sample data with amount:", jitter_amount))
print(paste("Number of rows after applying jitter:", nrow(location_sample_distances)))
return(location_sample_distances)
}
# Helper function to get OSM data
osm <- function(ciudad_bb, key, value) {
opq(bbox = ciudad_bb) %>%
add_osm_feature(key = key, value = value) %>%
osmdata_sf()
}
Argentina = st_read("C:/Users/sixto/OneDrive/Documentos/LSE/Dissertation/Properatiads/Ecuador/Argentina_properati.geojson")
## Reading layer `Argentina_properati' from data source
## `C:\Users\sixto\OneDrive\Documentos\LSE\Dissertation\Properatiads\Ecuador\Argentina_properati.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1121514 features and 20 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -80.20466 ymin: -54.84032 xmax: -0.000686646 ymax: 26.72034
## Geodetic CRS: WGS 84
Argentina = Argentina %>% st_set_crs(4326)
Now lets go with rosario
rosario_cbd = st_as_sf(data.frame(lon=-60.666596, lat=-32.964924), coords=c("lon", "lat"), crs=4326)
mapview(rosario_cbd)
parque_baigorria = st_as_sf(data.frame(lon=-60.695289, lat=-32.867059), coords=c("lon", "lat"), crs=4326)
parque_baigorria_buffer = st_buffer(parque_baigorria, 5000)
mapview(parque_baigorria) + mapview(parque_baigorria_buffer)
ciudad_bb <- st_bbox(parque_baigorria_buffer)
# Public Transport
tren <- osm(ciudad_bb, "railway", "station")$osm_polygons
public_transport <- osm(ciudad_bb, "amenity", "bus_station")$osm_polygons
# Water Features
rio <- osm(ciudad_bb, "water", "river")$osm_polygons
# Parks and Leisure
parques <- osm(ciudad_bb, "leisure", "park")$osm_polygons
sports_grounds <- osm(ciudad_bb, "leisure", "stadium")$osm_polygons
theaters <- osm(ciudad_bb, "leisure", "theatre")$osm_polygons
# Commercial and Retail
offices <- osm(ciudad_bb, "landuse", "commercial")$osm_polygons
shopping_centers <- osm(ciudad_bb, "shop", "mall")$osm_polygons
retail_shops <- osm(ciudad_bb, "shop", "retail")$osm_polygons
supermarkets <- osm(ciudad_bb, "shop", "supermarket")$osm_polygons
convenience_stores <- osm(ciudad_bb, "shop", "convenience")$osm_polygons
cafes <- osm(ciudad_bb, "amenity", "cafe")$osm_polygons
restaurants <- osm(ciudad_bb, "amenity", "restaurant")$osm_polygons
bars <- osm(ciudad_bb, "amenity", "bar")$osm_polygons
pubs <- osm(ciudad_bb, "amenity", "pub")$osm_polygons
# Services and Institutions
banks <- osm(ciudad_bb, "amenity", "bank")$osm_polygons
hospitals <- osm(ciudad_bb, "amenity", "hospital")$osm_polygons
schools <- osm(ciudad_bb, "amenity", "school")$osm_polygons
university <- osm(ciudad_bb, "amenity", "university")$osm_polygons
libraries <- osm(ciudad_bb, "amenity", "library")$osm_polygons
gyms <- osm(ciudad_bb, "amenity", "gym")$osm_polygons
police_stations <- osm(ciudad_bb, "amenity", "police")$osm_polygons
fire_stations <- osm(ciudad_bb, "amenity", "fire_station")$osm_polygons
post_offices <- osm(ciudad_bb, "amenity", "post_office")$osm_polygons
# Cultural and Entertainment
museums <- osm(ciudad_bb, "tourism", "museum")$osm_polygons
cinemas <- osm(ciudad_bb, "amenity", "cinema")$osm_polygons
# Industrial Areas
industrial <- osm(ciudad_bb, "landuse", "industrial")$osm_polygons
# Helper function to check and create mapview only if the dataset is not empty
create_mapview <- function(data, color, layer_name) {
if (nrow(data) > 0) {
return(mapview(data, col.regions = color, layer.name = layer_name))
} else {
return(NULL)
}
}
# Create mapview objects for each category with colors, checking if there's data
map_rio <- create_mapview(rio, "blue", "Rivers")
map_parques <- create_mapview(parques, "green", "Parks")
map_banks <- create_mapview(banks, "purple", "Banks")
map_restaurants <- create_mapview(restaurants, "orange", "Restaurants")
map_hospitals <- create_mapview(hospitals, "pink", "Hospitals")
map_schools <- create_mapview(schools, "yellow", "Schools")
map_public_transport <- create_mapview(public_transport, "brown", "Bus Stations")
map_shopping_centers <- create_mapview(shopping_centers, "cyan", "Shopping Centers")
map_offices <- create_mapview(offices, "red", "Commercial Areas")
map_industrial <- create_mapview(industrial, "gray", "Industrial Areas")
map_museums <- create_mapview(museums, "lightblue", "Museums")
map_libraries <- create_mapview(libraries, "lightgreen", "Libraries")
map_gyms <- create_mapview(gyms, "lightcoral", "Gyms")
map_sports_grounds <- create_mapview(sports_grounds, "yellowgreen", "Sports Grounds")
map_cafes <- create_mapview(cafes, "lightyellow", "Cafes")
map_retail_shops <- create_mapview(retail_shops, "lightpink", "Retail Shops")
map_bars <- create_mapview(bars, "brown", "Bars")
map_pubs <- create_mapview(pubs, "darkgreen", "Pubs")
map_supermarkets <- create_mapview(supermarkets, "gold", "Supermarkets")
map_convenience_stores <- create_mapview(convenience_stores, "lightgray", "Convenience Stores")
map_theaters <- create_mapview(theaters, "lightsteelblue", "Theaters")
map_cinemas <- create_mapview(cinemas, "lightsalmon", "Cinemas")
# Combine all the maps into one interactive map, only including those with data
combined_map <- list(
map_rio,
map_parques,
map_banks,
map_restaurants,
map_hospitals,
map_schools,
map_public_transport,
map_shopping_centers,
map_offices,
map_industrial,
map_museums,
map_libraries,
map_gyms,
map_sports_grounds,
map_cafes,
map_retail_shops,
map_bars,
map_pubs,
map_supermarkets,
map_convenience_stores,
map_theaters,
map_cinemas
)
# Remove NULLs (empty datasets) and combine the rest into a single map
combined_map <- combined_map[!sapply(combined_map, is.null)]
# Display the combined map
if (length(combined_map) > 0) {
combined_map <- Reduce(`+`, combined_map)
combined_map
} else {
print("No data available to display.")
}
parque_baigorria = parque_baigorria %>% st_set_crs(4326)
# Define parameters
distance_threshold <- 2500
jitter_amount <- 2 / 111320
pm2_min <- 1
pm2_max <- 4000
# Calculate distance and filter based on distance threshold
location_data <- Argentina %>%
mutate(dist_location = as.numeric(st_distance(Argentina, parque_baigorria))) %>%
filter(dist_location < distance_threshold)
print(paste("Filtered location data based on distance threshold of", distance_threshold, ":"))
## [1] "Filtered location data based on distance threshold of 2500 :"
print(paste("Number of rows after filtering based on distance threshold:", nrow(location_data)))
## [1] "Number of rows after filtering based on distance threshold: 1444"
# Calculate 'pm2' based on property_type
location_sample <- location_data %>%
mutate(price = as.numeric(price_usd),
surface_covered = as.numeric(surface_covered),
c = as.numeric(surface_total)) %>%
filter(property_type %in% c("Casa", "Lote", "Departamento")) %>%
mutate(pm2 = case_when(
property_type %in% c("Casa", "Departamento") ~ price_usd / if_else(is.na(surface_covered) | surface_covered == 0 | surface_covered == "", surface_total, surface_covered),
property_type == "Lote" ~ price_usd / surface_total,
TRUE ~ NA_real_ ))
location_sample <- location_sample %>%
filter(pm2 > pm2_min, pm2 < pm2_max, surface_total != "") %>% filter(!(currency_id == "ARS" & price == price_usd))
print("Sampled data after calculating pm2:")
## [1] "Sampled data after calculating pm2:"
print(paste("Number of rows after pm2 calculation and filtering by property type and pm2:", nrow(location_sample)))
## [1] "Number of rows after pm2 calculation and filtering by property type and pm2: 1294"
# Remove duplicates
location_sample <- location_sample %>% distinct(geometry, pm2, .keep_all = TRUE)
print("Sampled data after removing duplicates:")
## [1] "Sampled data after removing duplicates:"
print(paste("Number of rows after removing duplicates:", nrow(location_sample)))
## [1] "Number of rows after removing duplicates: 964"
library(dplyr)
library(mapview)
# Convert 'pm2' to numeric and clean character columns
location_sample <- location_sample %>%
mutate(
pm2 = as.numeric(as.character(pm2)), # Ensure pm2 is numeric
across(where(is.character), ~iconv(., "latin1", "UTF-8", sub = "")) # Clean character columns
) %>%
filter(!is.na(pm2)) # Remove rows with NA in pm2
# Render the map
mapview(location_sample, zcol = "pm2")
# Handle missing values (if any)
location_sample_clean <- location_sample %>%
filter(!is.na(pm2) & !is.na(price_usd) & !is.na(surface_total) & !is.na(property_type)) %>% filter(surface_total <3000)
# Summary statistics
summary_stats <- location_sample_clean %>%
group_by(property_type) %>%
summarize(
avg_pm2 = mean(pm2, na.rm = TRUE),
median_pm2 = median(pm2, na.rm = TRUE),
avg_price_usd = mean(price_usd, na.rm = TRUE),
median_price_usd = median(price_usd, na.rm = TRUE),
avg_surface_total = mean(surface_total, na.rm = TRUE),
median_surface_total = median(surface_total, na.rm = TRUE),
count = n()
)
print(summary_stats)
## Simple feature collection with 3 features and 8 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -60.72115 ymin: -32.88927 xmax: -60.68705 ymax: -32.84485
## Geodetic CRS: WGS 84
## # A tibble: 3 × 9
## property_type avg_pm2 median_pm2 avg_price_usd median_price_usd
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Casa 1079. 1038. 152531. 125000
## 2 Departamento 1716. 1517. 124587. 86000
## 3 Lote 267. 224 88663. 56000
## # ℹ 4 more variables: avg_surface_total <dbl>, median_surface_total <dbl>,
## # count <int>, geometry <MULTIPOINT [°]>
Vamos con Rosario
perform_analysis <- function(Argentina, analysis_location, distance_threshold, pm2_min = 1, pm2_max = 8000) {
# Calculate distances from the analysis location
location_data <- Argentina %>%
mutate(dist_location = as.numeric(st_distance(., analysis_location))) %>%
filter(dist_location < distance_threshold)
# Ensure geometries are valid
location_data <- st_make_valid(location_data)
# Filter and calculate price per square meter (pm2) based on property type
filtered_data <- location_data %>%
mutate(
price = as.numeric(price_usd),
surface_covered = as.numeric(surface_covered),
surface_total = as.numeric(surface_total)
) %>%
filter(property_type %in% c("Casa", "Lote", "Departamento")) %>%
mutate(pm2 = case_when(
property_type %in% c("Casa", "Departamento") ~ price_usd / if_else(is.na(surface_covered) | surface_covered == 0, surface_total, surface_covered),
property_type == "Lote" ~ price_usd / surface_total,
TRUE ~ NA_real_
)) %>%
filter(
pm2 > pm2_min, pm2 < pm2_max,
surface_total != "",
!(currency_id == "ARS" & price == price_usd)
)
# Calculate mean and standard deviation for distance filtering
dist_mean <- mean(filtered_data$dist_location, na.rm = TRUE)
dist_sd <- sd(filtered_data$dist_location, na.rm = TRUE)
# Final filtering
filtered_data <- filtered_data %>%
filter(dist_location < dist_mean + 3.5 * dist_sd) %>%
distinct(geometry, pm2, .keep_all = TRUE)
# Statistical analysis on property type
property_stats <- filtered_data %>%
group_by(property_type) %>%
summarise(
count = n(),
percentage = (n() / nrow(filtered_data)) * 100,
avg_pm2 = mean(pm2, na.rm = TRUE),
sd_pm2 = sd(pm2, na.rm = TRUE)
) %>%
ungroup()
# Print summary statistics
print("Statistical Analysis on Property Types:")
print(property_stats)
# Return the filtered dataset and property statistics
list(
filtered_data = filtered_data,
property_stats = property_stats
)
}
result <- perform_analysis(
Argentina = Argentina,
analysis_location = rosario_cbd, # Must be an sf object
distance_threshold = 50000,
pm2_min = 1,
pm2_max = 8000
)
## [1] "Statistical Analysis on Property Types:"
## Simple feature collection with 3 features and 5 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -61.02088 ymin: -33.26251 xmax: -60.42124 ymax: -32.68018
## Geodetic CRS: WGS 84
## # A tibble: 3 × 6
## property_type count percentage avg_pm2 sd_pm2 geometry
## <chr> <int> <dbl> <dbl> <dbl> <MULTIPOINT [°]>
## 1 Casa 14561 26.9 1158. 585. ((-60.95921 -32.89661), (-60.98…
## 2 Departamento 34102 63.0 1837. 638. ((-60.7851 -32.77288), (-60.901…
## 3 Lote 5491 10.1 231. 499. ((-60.9742 -32.91102), (-61.018…
# Access the filtered dataset
Rosario <- result$filtered_data
filtered_data <- result$filtered_data
# Access the statistical analysis
Rosario_Estadisticas <- result$property_stats
analyze_pm2_distribution <- function(filtered_data) {
# Cargar las librerías necesarias
library(plotly)
library(dplyr)
# Asegurarse de que las columnas necesarias existan
if (!all(c("property_type", "pm2") %in% colnames(filtered_data))) {
stop("Los datos de entrada deben tener las columnas 'property_type' y 'pm2'.")
}
# Eliminar valores faltantes o inválidos de pm2
filtered_data <- filtered_data %>% filter(!is.na(pm2) & pm2 > 0)
# Ajustar la distribución normal de pm2 para cada tipo de propiedad
pm2_distributions <- filtered_data %>%
group_by(property_type) %>%
summarise(
mean_pm2 = mean(pm2, na.rm = TRUE),
sd_pm2 = sd(pm2, na.rm = TRUE),
.groups = "drop"
)
# Imprimir las estadísticas resumen para cada tipo de propiedad
print("Resumen de las distribuciones de PM2 por tipo de propiedad:")
print(pm2_distributions)
# Unir las estadísticas resumen con los datos principales
filtered_data <- st_drop_geometry(filtered_data) %>%
left_join(pm2_distributions, by = "property_type")
# Generar y mostrar gráficos interactivos para cada tipo de propiedad
unique_property_types <- unique(filtered_data$property_type)
for (property in unique_property_types) {
# Filtrar los datos para el tipo de propiedad actual
property_data <- filtered_data %>% filter(property_type == property)
# Crear el gráfico interactivo
p <- plot_ly(property_data, x = ~pm2, type = "histogram", histnorm = "density",
marker = list(color = '#5A9BD5', opacity = 0.7, line = list(color = '#2F6A8A', width = 1))) %>%
layout(
title = list(
text = paste("Distribución de PM2 para", property),
font = list(size = 20, color = "#333")
),
xaxis = list(
title = "Precio por metro cuadrado (PM2)",
titlefont = list(size = 15),
showgrid = FALSE
),
yaxis = list(
title = "Densidad",
titlefont = list(size = 15),
showgrid = FALSE
),
plot_bgcolor = '#f9f9f9',
paper_bgcolor = '#ffffff',
showlegend = FALSE,
hovermode = "closest",
margin = list(l = 50, r = 50, b = 50, t = 70)
)
# Imprimir el gráfico para mostrarlo
print(p)
}
}
# Call the function with the filtered dataset
analyze_pm2_distribution(filtered_data)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## [1] "Resumen de las distribuciones de PM2 por tipo de propiedad:"
## Simple feature collection with 3 features and 3 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -61.02088 ymin: -33.26251 xmax: -60.42124 ymax: -32.68018
## Geodetic CRS: WGS 84
## # A tibble: 3 × 4
## property_type mean_pm2 sd_pm2 geometry
## <chr> <dbl> <dbl> <MULTIPOINT [°]>
## 1 Casa 1158. 585. ((-60.95921 -32.89661), (-60.98721 -32.9103), (…
## 2 Departamento 1837. 638. ((-60.7851 -32.77288), (-60.90173 -32.8946), (-…
## 3 Lote 231. 499. ((-60.9742 -32.91102), (-61.01896 -32.87779), (…
Baigorria_3000 <- perform_analysis(
Argentina = Argentina,
analysis_location = parque_baigorria, # Must be an sf object
distance_threshold = 3000,
pm2_min = 1,
pm2_max = 6000
)
## [1] "Statistical Analysis on Property Types:"
## Simple feature collection with 3 features and 5 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -60.72318 ymin: -32.89401 xmax: -60.68631 ymax: -32.84054
## Geodetic CRS: WGS 84
## # A tibble: 3 × 6
## property_type count percentage avg_pm2 sd_pm2 geometry
## <chr> <int> <dbl> <dbl> <dbl> <MULTIPOINT [°]>
## 1 Casa 890 68.9 1064. 453. ((-60.69722 -32.84054), (-60.69…
## 2 Departamento 278 21.5 1853. 1131. ((-60.7054 -32.85132), (-60.701…
## 3 Lote 123 9.53 287. 248. ((-60.69731 -32.84282), (-60.69…
analyze_pm2_distribution(Baigorria_3000$filtered_data)
## [1] "Resumen de las distribuciones de PM2 por tipo de propiedad:"
## Simple feature collection with 3 features and 3 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -60.72318 ymin: -32.89401 xmax: -60.68631 ymax: -32.84054
## Geodetic CRS: WGS 84
## # A tibble: 3 × 4
## property_type mean_pm2 sd_pm2 geometry
## <chr> <dbl> <dbl> <MULTIPOINT [°]>
## 1 Casa 1064. 453. ((-60.69722 -32.84054), (-60.69723 -32.84059), …
## 2 Departamento 1853. 1131. ((-60.7054 -32.85132), (-60.70194 -32.84876), (…
## 3 Lote 287. 248. ((-60.69731 -32.84282), (-60.69738 -32.84273), …
library(e1071)
library(plotly)
library(sf)
library(dplyr)
# Define the function for clustering, plotting with Plotly, and cluster details
cluster_and_plot_plotly <- function(data, selected_columns, k = 6, m = 6) {
# Select the relevant columns for clustering
subset_data <- data[selected_columns]
# Drop the geometry column if present
subset_data_1 <- st_drop_geometry(subset_data)
# Convert property_type to numeric (if it's a factor or character)
if (is.character(subset_data_1$property_type)) {
subset_data_1$property_type <- as.numeric(factor(subset_data_1$property_type))
}
# Remove rows with missing data
subset_data_clean <- subset_data_1 %>% filter(!is.na(pm2) & !is.na(property_type))
# Scale the data for clustering
subset_data_clean_scaled <- scale(subset_data_clean)
# Perform FCM clustering
fcm_model <- cmeans(subset_data_clean_scaled, centers = k, m = m, iter.max = 1000)
# Check if fcm_model$u exists (membership matrix)
if ("u" %in% names(fcm_model)) {
# Assign the cluster with the highest membership
subset_data_clean$cluster <- apply(fcm_model$u, 1, which.max)
} else {
# If fcm_model$u doesn't exist, use fcm_model$cluster
subset_data_clean$cluster <- fcm_model$cluster
}
# Re-attach the geometry to the clustered data
subset_data_clean_sf <- cbind(data, cluster = subset_data_clean$cluster)
# Create an interactive plot with Plotly
plot <- subset_data_clean_sf %>%
plot_ly(
type = "scattermapbox",
mode = "markers",
color = ~factor(cluster), # Color by cluster
colors = c("red", "blue", "green", "purple", "orange"), # Assign colors to clusters
hoverinfo = "text",
text = ~paste("Cluster:", cluster,
"<br>PM2:", pm2,
"<br>Property Type:", property_type),
marker = list(size = 5)
) %>%
layout(
title = "Clustering Map of PM2 and Property Type",
mapbox = list(
style = "open-street-map", # Use OpenStreetMap as background
zoom = 10,
center = list(lon = mean(st_coordinates(subset_data_clean_sf)[,1]),
lat = mean(st_coordinates(subset_data_clean_sf)[,2]))
)
)
# Calculate and print cluster details (compositions)
cluster_details <- subset_data_clean %>%
group_by(cluster) %>%
summarise(
count = n(),
avg_pm2 = mean(pm2, na.rm = TRUE),
avg_property_type = mean(property_type, na.rm = TRUE)
)
print(cluster_details) # Print the cluster composition
# Return the Plotly plot
return(plot)
}
# Example usage of the function with your dataset
cluster_and_plot_plotly(Baigorria_3000$filtered_data, selected_columns = c("pm2", "property_type"))
## # A tibble: 6 × 4
## cluster count avg_pm2 avg_property_type
## <int> <int> <dbl> <dbl>
## 1 1 251 1613. 1
## 2 2 125 271. 2.98
## 3 3 161 1114. 2.01
## 4 4 332 629. 1
## 5 5 118 2936. 1.97
## 6 6 304 1067. 1
# Example usage of the function with your dataset
cluster_and_plot_plotly(Rosario, selected_columns = c("pm2", "property_type"))
## # A tibble: 6 × 4
## cluster count avg_pm2 avg_property_type
## <int> <int> <dbl> <dbl>
## 1 1 10974 631. 2.48
## 2 2 6542 697. 1
## 3 3 8712 1489. 2.01
## 4 4 10161 2613. 1.99
## 5 5 9971 1829. 2.01
## 6 6 7794 1479. 1