This program presents the steps used to construct a typology of bicycle users based on the 2019 Household Travel Survey conducted in Bogotá and made available as open data. The method relies on a factor analysis of mixed data (FAMD) and a hierarchical cluster analysis (HCA). The approach combines sociodemographic characteristics of 3,241 individuals and variables describing their mobility practices. Maps showing the concentration of residence locations and destination locations for the six cyclist profiles are provided The method developed in R is reproducible for other urban areas and other groups of people using other modes of transport.
library(openxlsx) # For importing/exporting from/to Excel
library(FactoMineR) # To run factor analyses (such as FAMD) and classifications (e.g., HCA)
library(factoextra) # To enhance the rendering of graphs produced by FactoMineR
library(dplyr) # To facilitate data manipulation
library(tidyr) # To switch from long to wide format
library(fmsb) # To produce radar charts
library(ggplot2) # To produce advanced graphics
library(sf) # To manipulate spatial objects
library(spatstat) # For spatial smoothing
library(terra) # For processing raster data
library(mapsf) # For cartographic representation and map layout
library(ggnewscale) # To set color palettes for ggplot graphs
library(plotly) # To plot 3D graphs
library(geometry) # To plot convex hulls
library(ggiraph) # To create interactive graphs from ggplot
library(patchwork) # To combine multiple graphs on the same layout
# Import of the table of individuals who used the bicycle as their main mode of transport in the city the day before the survey
Cyclistes <- read.xlsx("Cyclistes.xlsx", sheet = "VariablesCyclistes")
rownames(Cyclistes) <- Cyclistes$id_hog_pers # To add the person's ID as row names
Cyclistes$id_hog_pers <- NULL # To remove the column
head(Cyclistes)
## Edad Sexo NivEducCL LicCondu Ocasional DuracionCL MotivoCL
## P00316-4 Inf. 15 Mujer PrimariaSecund LicCon-No Ocas-No Inf. 30 Estudio
## 12861-1 40-64 Mujer Universitario LicCon-No Ocas-No Inf. 30 Otro
## 5636-4 Inf. 15 Mujer PrimariaSecund LicCon-No Ocas-No Inf. 30 Estudio
## 10497-3 15-39 Hombre Media LicCon-No Ocas-Si 60-89 Otro
## P05845-6 15-39 Hombre Media LicCon-No Ocas-No 30-59 Trab
## 6973-1 40-64 Hombre PrimariaSecund LicCon-Si Ocas-No Inf. 30 Trab
## Estrato LugResid GranZona DistanciaCL Duracion Distancia PartModInd
## P00316-4 Estr2 MOSQUERA Periferia 2-4 km 16 2480 100.0
## 12861-1 Estr2 MOSQUERA Periferia Inf 2 km 10 742 100.0
## 5636-4 Estr0 SOACHA Sur 2-4 km 15 2056 100.0
## 10497-3 Estr5 FONTIBON Oeste Inf 2 km 60 819 100.0
## P05845-6 Estr0 TOCANCIPA Periferia 15 km y más 30 27647 100.0
## 6973-1 Estr2 MADRID Periferia 6-10 km 15 8532 66.6
## Experi NumViajDia FrecSemana FrecFinSem NumAuto NumMoto NumPersHog
## P00316-4 5.0 2 5.0 0 0 2 5
## 12861-1 5.0 4 5.0 0 0 0 2
## 5636-4 1.5 2 4.5 0 0 0 4
## 10497-3 3.0 2 0.0 0 0 0 4
## P05845-6 5.0 2 4.0 1 0 1 8
## 6973-1 3.0 4 4.0 1 0 0 2
## Pct_ICS12 ParModUtam
## P00316-4 30.2 18.205580
## 12861-1 12.5 18.205580
## 5636-4 22.1 6.782659
## 10497-3 0.7 1.798401
## P05845-6 38.4 10.204420
## 6973-1 49.2 15.953432
str(Cyclistes)
## 'data.frame': 3241 obs. of 23 variables:
## $ Edad : chr "Inf. 15" "40-64" "Inf. 15" "15-39" ...
## $ Sexo : chr "Mujer" "Mujer" "Mujer" "Hombre" ...
## $ NivEducCL : chr "PrimariaSecund" "Universitario" "PrimariaSecund" "Media" ...
## $ LicCondu : chr "LicCon-No" "LicCon-No" "LicCon-No" "LicCon-No" ...
## $ Ocasional : chr "Ocas-No" "Ocas-No" "Ocas-No" "Ocas-Si" ...
## $ DuracionCL : chr "Inf. 30" "Inf. 30" "Inf. 30" "60-89" ...
## $ MotivoCL : chr "Estudio" "Otro" "Estudio" "Otro" ...
## $ Estrato : chr "Estr2" "Estr2" "Estr0" "Estr5" ...
## $ LugResid : chr "MOSQUERA" "MOSQUERA" "SOACHA" "FONTIBON" ...
## $ GranZona : chr "Periferia" "Periferia" "Sur" "Oeste" ...
## $ DistanciaCL: chr "2-4 km" "Inf 2 km" "2-4 km" "Inf 2 km" ...
## $ Duracion : num 16 10 15 60 30 15 30 45 75 60 ...
## $ Distancia : num 2480 742 2056 819 27647 ...
## $ PartModInd : num 100 100 100 100 100 66.6 100 100 33.3 100 ...
## $ Experi : num 5 5 1.5 3 5 3 5 5 4 5 ...
## $ NumViajDia : num 2 4 2 2 2 4 2 1 2 3 ...
## $ FrecSemana : num 5 5 4.5 0 4 4 5 0 0 4 ...
## $ FrecFinSem : num 0 0 0 0 1 1 0 0 0 0 ...
## $ NumAuto : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NumMoto : num 2 0 0 0 1 0 0 0 1 0 ...
## $ NumPersHog : num 5 2 4 4 8 2 6 5 6 7 ...
## $ Pct_ICS12 : num 30.2 12.5 22.1 0.7 38.4 49.2 19.4 46.1 44.9 55.7 ...
## $ ParModUtam : num 18.21 18.21 6.78 1.8 10.2 ...
summary(Cyclistes)
## Edad Sexo NivEducCL LicCondu
## Length:3241 Length:3241 Length:3241 Length:3241
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Ocasional DuracionCL MotivoCL Estrato
## Length:3241 Length:3241 Length:3241 Length:3241
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## LugResid GranZona DistanciaCL Duracion
## Length:3241 Length:3241 Length:3241 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 13.00
## Mode :character Mode :character Mode :character Median : 22.00
## Mean : 38.69
## 3rd Qu.: 40.00
## Max. :810.00
## Distancia PartModInd Experi NumViajDia
## Min. : 46 Min. : 12.50 Min. :1.000 Min. :1.000
## 1st Qu.: 1727 1st Qu.: 60.00 1st Qu.:4.000 1st Qu.:2.000
## Median : 4161 Median :100.00 Median :5.000 Median :2.000
## Mean : 7438 Mean : 84.56 Mean :4.398 Mean :2.111
## 3rd Qu.: 9400 3rd Qu.:100.00 3rd Qu.:5.000 3rd Qu.:2.000
## Max. :86940 Max. :100.00 Max. :5.000 Max. :8.000
## FrecSemana FrecFinSem NumAuto NumMoto
## Min. :0.000 Min. :0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :4.000 Median :0.00 Median :0.0000 Median :0.0000
## Mean :3.175 Mean :0.34 Mean :0.2391 Mean :0.1765
## 3rd Qu.:5.000 3rd Qu.:1.00 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :5.000 Max. :2.00 Max. :4.0000 Max. :5.0000
## NumPersHog Pct_ICS12 ParModUtam
## Min. : 1.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 9.10 1st Qu.: 5.095
## Median : 4.000 Median : 23.70 Median : 7.974
## Mean : 3.773 Mean : 25.45 Mean : 9.138
## 3rd Qu.: 5.000 3rd Qu.: 39.50 3rd Qu.:12.190
## Max. :13.000 Max. :100.00 Max. :23.040
res <- FAMD(Cyclistes, graph = FALSE)
contrib <- transform(data.frame(res$ind$contrib), contribPlan1 = Dim.1 + Dim.2)
selected_individuals <- rownames(contrib)[contrib$contribPlan1 >= quantile(contrib$contribPlan1, 0.99)] # Creating a list of individuals whose labels will be displayed on the graphs (only 1% are selected here to avoid cluttering the graphs)
rm(contrib)
plot.FAMD(res, title = "FAMD individual plot with categorical values", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, label = c('ind','quali.var'), select = selected_individuals)
plot.FAMD(res, title = "Plot of categorical values", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, invisible = "ind")
plot.FAMD(res, choix = "var", title = "Plot of centers of quantitative and categorical variables", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, xlim = c(0, 0.8), ylim = c(0, 0.55))
plot.FAMD(res, choix = "quanti", title = "Correlation plot of quantitative variables", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75)
Cyclistes$Experi <- NULL
Cyclistes$NumMoto <- NULL
Cyclistes$NumViajDia <- NULL
Cyclistes$PartModInd <- NULL
print(as.data.frame(colnames(Cyclistes)))
## colnames(Cyclistes)
## 1 Edad
## 2 Sexo
## 3 NivEducCL
## 4 LicCondu
## 5 Ocasional
## 6 DuracionCL
## 7 MotivoCL
## 8 Estrato
## 9 LugResid
## 10 GranZona
## 11 DistanciaCL
## 12 Duracion
## 13 Distancia
## 14 FrecSemana
## 15 FrecFinSem
## 16 NumAuto
## 17 NumPersHog
## 18 Pct_ICS12
## 19 ParModUtam
Cyclistes <- Cyclistes[Cyclistes$Duracion < 300,]
res <- FAMD(Cyclistes, sup.var = c(2,6,9,10,11,15,17), graph = FALSE)
contrib <- transform(data.frame(res$ind$contrib), contribPlan1 = Dim.1 + Dim.2)
selected_individuals <- rownames(contrib)[contrib$contribPlan1 >= quantile(contrib$contribPlan1, 0.95)] # Creating a list of individuals whose labels will be displayed on the graphs (only 5% are selected here to avoid cluttering the graphs)
rm(contrib)
plot.FAMD(res, title = "FAMD individual plot with categorical values", cex = 0.6, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, label = c('ind','quali.var'), select = selected_individuals)
plot.FAMD(res, title = "Plot of categorical values", cex = 0.7, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, invisible = "ind")
plot.FAMD(res, choix = "var", title = "Plot of centers of quantitative and categorical variables", cex = 0.7, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, xlim = c(0, 0.55), ylim = c(0, 0.45))
plot.FAMD(res, choix = "quanti", title = "Correlation plot of quantitative variables", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75)
nbclasses <- 6 # Test with different numbers of clusters
res.hcpc <- HCPC(res, kk = Inf, nb.clust = nbclasses, consol = FALSE, graph = FALSE) # kk = Inf means no prior partition, consol = FALSE means no consolidation (by k-means) after HCA
plot(res.hcpc, choice = "tree", labels = FALSE)
# Creation of the initial 2D graph
cluster_colors <- c("blue", "green", "red", "yellow", "black", "cyan")
d <- fviz_cluster(res.hcpc,
repel = TRUE,
show.clust.cent = FALSE, # No centers
palette = cluster_colors,
ggtheme = theme_gray(),
main = "Factor plane",
geom = "point",
pointsize = 0.25,
alpha = 0.6,
ellipse = TRUE,
ellipse.type = "convex",
ellipse.alpha = 0.2,
ellipse.linetype = "solid", # Solid line for ellipses
star.plot = FALSE,
shape = 19) # Identical points
# Modify the graph
d_modified <- d +
geom_hline(yintercept = 0, color = "black", linetype = "dashed") +
geom_vline(xintercept = 0, color = "black", linetype = "dashed") +
theme(
plot.margin = margin(10, 10, 10, 10),
aspect.ratio = 0.5
)
# Display the modified graph
print(d_modified)
# Retrieve individual coordinates on the factor axes
ind_coords <- as.data.frame(res$ind$coord)[, 1:3] # Axes 1, 2, 3
colnames(ind_coords) <- c("Dim.1", "Dim.2", "Dim.3") # Rename columns
ind_coords$cluster <- as.factor(res.hcpc$data.clust$clust) # Add clusters
ind_coords$cluster <- factor(ind_coords$cluster, levels = sort(unique(ind_coords$cluster)))
# Retrieve inertia
inertia <- res$eig[1:3, 2] # % inertia for axes 1, 2, and 3
# Assign colors to clusters
names(cluster_colors) <- levels(ind_coords$cluster)
# Function to get the color of a given cluster (used for convex hulls)
get_cluster_color <- function(cluster) {
return(cluster_colors[as.character(cluster)])
}
# Initialize the graph
fig <- plot_ly()
# Add individual points in black
fig <- fig %>% add_trace(
data = ind_coords,
x = ~Dim.1, y = ~Dim.2, z = ~Dim.3,
type = "scatter3d",
mode = "markers",
marker = list(color = "black", size = 1, opacity = 0.8),
showlegend = FALSE,
inherit = FALSE
)
# Calculate maximum ranges for each axis
max_range <- max(abs(as.matrix(ind_coords[,1:3]))) * 1.1
# Create dashed axis lines passing through the origin
fig <- fig %>%
add_trace(
x = c(-max_range, max_range),
y = c(0, 0),
z = c(0, 0),
type = "scatter3d",
mode = "lines",
line = list(color = "grey80", dash = "dash", width = 0.5),
showlegend = FALSE,
inherit = FALSE,
name = ""
) %>%
add_trace(
x = c(0, 0),
y = c(-max_range, max_range),
z = c(0, 0),
type = "scatter3d",
mode = "lines",
line = list(color = "grey80", dash = "dash", width = 0.5),
showlegend = FALSE,
inherit = FALSE,
name = ""
) %>%
add_trace(
x = c(0, 0),
y = c(0, 0),
z = c(-max_range, max_range),
type = "scatter3d",
mode = "lines",
line = list(color = "grey80", dash = "dash", width = 0.5),
showlegend = FALSE,
inherit = FALSE,
name = ""
)
# Add convex hulls for each cluster with the palette colors
for (clus in levels(ind_coords$cluster)) {
cluster_points <- ind_coords[ind_coords$cluster == clus, c("Dim.1", "Dim.2", "Dim.3")]
hull <- convhulln(cluster_points) # Direct calculation, without condition
col <- cluster_colors[as.character(clus)]
fig <- fig %>% add_trace(
type = "mesh3d",
x = cluster_points$Dim.1,
y = cluster_points$Dim.2,
z = cluster_points$Dim.3,
i = hull[,1] - 1,
j = hull[,2] - 1,
k = hull[,3] - 1,
opacity = 0.2,
facecolor = rep(col, nrow(hull)),
flatshading = TRUE,
showscale = FALSE,
name = paste("Cluster", clus),
showlegend = TRUE,
inherit = FALSE
)
}
# Disable projected panes and add a uniform gray background
fig <- fig %>%
layout(
paper_bgcolor = "#D3D3D3",
plot_bgcolor = "#D3D3D3",
scene = list(
xaxis = list(
title = "",
showbackground = FALSE,
showline = FALSE, # Remove axis line
showticklabels = FALSE, # Remove numeric tick labels
zeroline = FALSE, # Remove default zero line
showgrid = FALSE # Optional: remove grid
),
yaxis = list(
title = "",
showbackground = FALSE,
showline = FALSE,
showticklabels = FALSE,
zeroline = FALSE,
showgrid = FALSE
),
zaxis = list(
title = "",
showbackground = FALSE,
showline = FALSE,
showticklabels = FALSE,
zeroline = FALSE,
showgrid = FALSE
),
aspectmode = "manual",
aspectratio = list(x = 1.5, y = 1, z = 1)
),
showlegend = TRUE
)
# Configure the legend with a title
fig <- fig %>% layout(
legend = list(
title = list(text = "<b>Profiles</b>"), # Legend title
font = list(size = 10) # Title font style
)
)
# Display the graph
fig
dataclust <- as.data.frame(res.hcpc[["data.clust"]])
IndividusClasse <- table(dataclust$clust) # To create a table with the number of individuals per each cluster
print(IndividusClasse)
##
## 1 2 3 4 5 6
## 339 696 471 1144 232 322
barplot(table(dataclust$clust), main = "Number of individuals by profile")
# This operation is necessary to simplify the graphs later
TableauVariablesCategorielles <- Filter(is.character, Cyclistes)
NbModalitesParVariable <- data.frame()
for (i in colnames(TableauVariablesCategorielles)){
# Retrieve the number of values for each categorical variable
fichier <- length(unique(TableauVariablesCategorielles[[i]]))
NbModalitesParVariable <- rbind(NbModalitesParVariable, fichier)
}
rm(i, fichier)
NbModalitesParVariable$Variable <- colnames(TableauVariablesCategorielles)
VariablesDeuxModalites <- NbModalitesParVariable[which(NbModalitesParVariable[,1] == 2),]
VariablesDeuxModalites <- as.character(VariablesDeuxModalites$Variable)
# By default v.test > |1.96| - In this case, raising the threshold reduces the number of variables that will describe the clusters. Only extremely significant ones are kept.
Seuil.v.test <- 3.29 # Accepting a 1 in 1000 chance of error
# learn more https://www.medcalc.org/manual/values-of-the-normal-distribution.php
# For variables with only two categorical values, only one is kept
for (i in 1:nbclasses){
# Retrieve the description of each of the N clusters by quantitative variables into tables
b <- as.data.frame(res.hcpc$desc.var$quanti[[i]])
b <- b[which(colnames(b) == 'v.test')]
b <- signif(b,3) # To round values
# Retrieve the description of each of the N clusters by categorical variables into tables
c <- as.data.frame(res.hcpc$desc.var$category[[i]])
d <- as.data.frame(c$label <- rownames(c))
d <- as.data.frame(c$label <- sub("\\=.*", "", c$label))
d <- as.data.frame(c[which(c$label %in% VariablesDeuxModalites), ])
d <- as.data.frame(d[which(d$v.test > 0),])
e <- as.data.frame(c[which(!c$label %in% VariablesDeuxModalites), ])
f <- as.data.frame(rbind(d, e))
g <- as.data.frame(f$label <- NULL)
c <- signif(f,3) # To round values
c <- c[which(colnames(c) == 'v.test')]
# Combine tables with quantitative and categorical variables
h <- as.data.frame(rbind(b,c))
h <- as.data.frame(cbind(h, h$label <- row.names(h)))
colnames(h) <- c("v.test", "label")
h <- as.data.frame(h[order(h$v.test, decreasing = TRUE), ])
h <- as.data.frame(h[which(!h$v.test == "Inf"),])
h <- as.data.frame(h[which(abs(h$v.test) >= Seuil.v.test),])
# To plot horizontal bar charts
op <- par(oma = c(0,7,0,0))
barplot((h$v.test), names = row.names(h), border = "white", horiz = TRUE, las = 1, xlim = c(-5 - max(abs(h$v.test)), 5 + max(abs(h$v.test))), cex.names=0.8, main = paste("profile", i, sep = ""))
par(op)
}
rm(b,c,d,e,f,g,h,i)
#to reduce margins and set a grey background
opar <- par(mar = c(2, 2, 2, 2), bg = "grey60")
# Color vector
colors_border = c("blue", "green" , "red", "yellow", "black", "cyan")
colors_in = c("blue", "green" , "red", "yellow", "black", "cyan")
## Individual variables (table preparation)
# Gender and driving license
genre_permis_radar <- dataclust %>%
group_by(clust) %>%
summarise(muj = sum(Sexo == "Mujer") / n() * 100,
permis_cond = sum(LicCondu == "LicCon-Si") / n() * 100)
# AGE
age_radar <- dataclust %>%
group_by(clust, Edad) %>%
summarise(effectifs = n()) %>%
group_by(clust) %>%
mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
pivot_wider(names_from = Edad, values_from = effectifs)
# replace NA by 0
age_radar <- replace(age_radar, is.na(age_radar), 0)
# reorder columns
age_radar <- age_radar[c(1,5,3,4,2)]
# EDUCATION LEVEL
NivEduc_radar <- dataclust %>%
group_by(clust, NivEducCL) %>%
summarise(effectifs = n()) %>%
group_by(clust) %>%
mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
pivot_wider(names_from = NivEducCL, values_from = effectifs)
# replace NA by 0
NivEduc_radar <- replace(NivEduc_radar, is.na(NivEduc_radar), 0)
# reorder columns
NivEduc_radar <- NivEduc_radar[c(1,3,2,4,5)]
# combine dataframes
var_indiv_radar <- cbind(genre_permis_radar, age_radar[-1], NivEduc_radar[-1])
# assign rownames
rownames(var_indiv_radar) <- paste("profile" , var_indiv_radar$clust , sep = "-")
var_indiv_radar$clust <- NULL
# To move a column
var_indiv_radar <- var_indiv_radar %>% relocate(permis_cond, .after = Universitario)
summary(var_indiv_radar)
## muj Inf. 15 15-39 40-64
## Min. :14.75 Min. : 0.00000 Min. : 0.3106 Min. : 0.00
## 1st Qu.:19.68 1st Qu.: 0.00000 1st Qu.:59.4782 1st Qu.:18.31
## Median :23.99 Median : 0.08741 Median :63.3530 Median :30.58
## Mean :24.15 Mean :16.71588 Mean :56.1407 Mean :23.77
## 3rd Qu.:29.94 3rd Qu.: 0.36698 3rd Qu.:65.7156 3rd Qu.:31.89
## Max. :31.99 Max. :99.68944 Max. :85.1380 Max. :34.88
## >=65 PrimariaSecund Media Tecnico
## Min. :0.0000 Min. : 5.015 Min. : 0.9317 Min. : 0.000
## 1st Qu.:0.7492 1st Qu.:11.119 1st Qu.:10.2151 1st Qu.: 9.937
## Median :3.0593 Median :34.419 Median :28.5920 Median :13.927
## Mean :3.3780 Mean :38.559 Mean :21.7982 Mean :12.517
## 3rd Qu.:4.4957 3rd Qu.:50.148 3rd Qu.:32.3120 3rd Qu.:16.379
## Max. :9.1954 Max. :98.758 Max. :35.0318 Max. :21.444
## Universitario permis_cond
## Min. : 0.3106 Min. : 0.00
## 1st Qu.: 6.5672 1st Qu.:24.81
## Median :19.0971 Median :64.13
## Mean :27.1259 Mean :53.00
## 3rd Qu.:34.8861 3rd Qu.:81.09
## Max. :81.7109 Max. :91.30
# to add two rows at the top of the dataframe with the max and min for each variable (based on the previous summary)
var_indiv_radar <- rbind(rep(100,10) , rep(0,10) , var_indiv_radar)
# to reorder columns
var_indiv_radar <- var_indiv_radar[, c(1, rev(seq_along(var_indiv_radar)[-1]))]
# plot radar chart
radarchart(var_indiv_radar, axistype = 1, seg = 5, pty = 32, centerzero = FALSE,
#custom polygon
pcol = colors_border, pfcol = adjustcolor(colors_in, alpha.f = 0), plwd = 2 , plty = 1,
#custom the grid
cglcol = "white", cglty = 3, axislabcol = "white", caxislabels = rep("", 10), cglwd = 0.1,
#custom labels
vlcex = 0.5, calcex = 0.5
)
# Add a legend
legend(x = 1.2, y = 1.2, legend = rownames(var_indiv_radar[-c(1,2),]), bty = "n", pch = 18 , col = colors_border, text.col = "black", cex = 0.5, pt.cex = 0.9)
# add a title
title(main = "Cyclist characteristics by profile\n(summary)", cex.main = 0.8)
# Split the screen in 6 parts to plot each radar separately
opar <- par(oma = c(0,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5), mfrow = c(2,3), bg = "grey60")
# Plot the radar charts
for(i in 1:length(unique(dataclust$clust))){
radarchart(var_indiv_radar[c(1, 2, i+2), ], axistype = 1, seg = 5, pty = 32, centerzero = FALSE,
#custom polygon
pcol=colors_border[i], pfcol = adjustcolor(colors_in[i], alpha.f = 0), plwd = 2 , plty = 1,
#custom the grid
cglcol = "white", cglty = 3, axislabcol = "white", cglwd = 0.1, caxislabels = rep("", 10),
#custom labels
vlcex = 0.5, palcex = 0.8)
# To add a title to each radar
title(main = paste("profile", i, sep = "-"), cex.main = 0.8)
}
# To insert a main title
mtext("Cyclist characteristics by profile",
cex = 1, side = 3,line = 1, adj = 0.5, outer = TRUE)
# to reset graphic parameters
par(opar)
## Variables characterizing trips and cycling practices (table preparation)
# Trips
trajet_radar <- dataclust %>%
group_by(clust) %>%
summarise(
travail = sum(MotivoCL == "Trab") / n() * 100,
ecole = sum(MotivoCL == "Estudio") / n() * 100,
loisir = sum(MotivoCL == "Ocio") / n() * 100,
autre = sum(MotivoCL == "Otro") / n() * 100,
durée = mean(Duracion),
distance = mean(Distancia),
FreqSemaine = mean(FrecSemana),
FreqWeekEnd = mean (FrecFinSem),
Occasionnel = sum(Ocasional == "Ocas-Si") / n() * 100
) %>%
as.data.frame()
rownames(trajet_radar) <- paste("profile" , trajet_radar$clust , sep = "-")
trajet_radar$clust <- NULL
summary(trajet_radar)
## travail ecole loisir autre
## Min. : 1.242 Min. : 1.724 Min. : 1.699 Min. : 8.555
## 1st Qu.:31.253 1st Qu.: 5.573 1st Qu.: 4.155 1st Qu.:14.434
## Median :57.429 Median :10.024 Median : 7.879 Median :22.211
## Mean :46.428 Mean :18.618 Mean : 8.672 Mean :26.282
## 3rd Qu.:61.447 3rd Qu.:16.114 3rd Qu.:11.778 3rd Qu.:26.824
## Max. :77.155 Max. :67.702 Max. :18.584 Max. :64.511
## durée distance FreqSemaine FreqWeekEnd
## Min. :20.63 Min. : 4763 Min. :0.04095 Min. :0.01724
## 1st Qu.:27.21 1st Qu.: 5683 1st Qu.:3.76256 1st Qu.:0.11443
## Median :33.96 Median : 7027 Median :3.95093 Median :0.34907
## Mean :34.58 Mean : 8305 Mean :3.34074 Mean :0.31478
## 3rd Qu.:37.84 3rd Qu.: 7446 3rd Qu.:4.11719 3rd Qu.:0.52048
## Max. :54.86 Max. :18194 Max. :4.22923 Max. :0.55944
## Occasionnel
## Min. : 0.08741
## 1st Qu.: 0.89670
## Median : 4.27665
## Mean :20.50617
## 3rd Qu.:13.97850
## Max. :97.41379
# to add two rows at the top of the dataframe with the max and min for each variable (based on the previous summary)
trajet_radar <- rbind(c(rep(100,4), 60, 20000, 5, 1, 100), rep(0, length(colnames(trajet_radar))), trajet_radar)
# to reorder columns
trajet_radar <- trajet_radar[, c(1, rev(seq_along(trajet_radar)[-1]))]
# plot radar chart
radarchart(trajet_radar, axistype = 1 , seg = 5, pty = 32, centerzero = FALSE,
#custom polygon
pcol = colors_border, pfcol = adjustcolor(colors_in, alpha.f = 0), plwd = 2 , plty = 1,
#custom the grid
cglcol = "white", cglty = 3, axislabcol = "white", cglwd = 0.1, caxislabels = rep("", 10),
#custom labels
vlcex = 0.5, calcex = 0.5
)
# Add a legend
legend(x = 1.2, y = 1.2, legend = rownames(trajet_radar[-c(1,2),]), bty = "n", pch = 18 , col = colors_border, text.col = "black", cex = 0.5, pt.cex = 0.9)
# add a title
title(main = "Trip characteristics by profile\n(summary)", cex.main = 0.8)
# Split the screen in 6 parts to plot each radar separately
opar <- par(oma = c(0,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5), mfrow = c(2,3), bg = "grey60")
# Plot the radar charts
for(i in 1:length(unique(dataclust$clust))){
radarchart(trajet_radar[c(1, 2, i+2), ], axistype = 1, seg = 5, pty = 32, centerzero = FALSE,
#custom polygon
pcol = colors_border[i], pfcol = adjustcolor(colors_in[i], alpha.f = 0), plwd = 2 , plty = 1,
#custom the grid
cglcol = "white", cglty = 3, axislabcol = "white", cglwd = 0.1, caxislabels = rep("", 10),
#custom labels
vlcex = 0.5, palcex = 0.8)
# To add a title to each radar
title(main = paste("profile", i, sep = "-"), cex.main = 0.8)
}
# To insert a main title
mtext("Trip characteristics by profile",
cex = 1, side = 3,line = 1, adj = 0.5, outer = TRUE)
# to reset graphic parameters
par(opar)
## Variables characterizing households (table preparation)
# Stratum characterizing the housing and neighborhood standard
StratesLogt <- dataclust %>%
group_by(clust, Estrato) %>%
summarise(effectifs = n(), .groups = 'drop') %>%
group_by(clust) %>%
mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
pivot_wider(names_from = Estrato, values_from = effectifs)
# replace NA by 0
StratesLogt <- replace(StratesLogt, is.na(StratesLogt), 0)
# To move a column
StratesLogt <- StratesLogt %>% relocate(Estr0, .before = Estr1)
# To coerce df to long format
StratesLogt_long <- StratesLogt %>%
pivot_longer(cols = -clust, names_to = "estrato", values_to = "pourcentage")
# Create interactive graph with ggiraph
gg_strates <- ggplot(StratesLogt_long,
aes(x = as.factor(clust),
y = pourcentage,
fill = factor(estrato, levels = rev(unique(estrato))),
tooltip = paste("Profile:", clust,
"<br>Stratum:", estrato,
"<br>Percentage:", round(pourcentage, 1), "%"),
data_id = paste(clust, estrato, sep = "_"))) +
geom_bar_interactive(position = "fill", stat = "identity") +
theme_bw() +
scale_fill_brewer(palette = "YlOrRd", direction = -1) +
labs(fill = "Housing strata\n(%)", y = "%", x = "profile")
# Display interactive graph with customized hover
girafe(
ggobj = gg_strates,
options = list(opts_sizing(rescale = TRUE, # Allows resizing
width = 1), # Width relative to container
opts_hover(
css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"
),
opts_tooltip(
css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"
),
opts_hover_inv(css = "opacity:0.7;") # Other elements become slightly transparent
)
)
## Variables characterizing places of residence (table preparation)
# Major residential zones
GranZona <- dataclust %>%
group_by(clust, GranZona) %>%
summarise(effectifs = n(), .groups = 'drop') %>%
group_by(clust) %>%
mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
pivot_wider(names_from = GranZona, values_from = effectifs)
# replace NA by 0
GranZona <- replace(GranZona, is.na(GranZona), 0)
# To coerce df to long format
GranZona_long <- GranZona %>%
pivot_longer(cols = -clust, names_to = "GranZona", values_to = "pourcentage")
# Create interactive graph with ggiraph
gg_granzona <- ggplot(GranZona_long,
aes(x = as.factor(clust),
y = pourcentage,
fill = factor(GranZona, levels = rev(unique(GranZona))),
tooltip = paste("Profile:", clust,
"<br>Zone:", GranZona,
"<br>Percentage:", round(pourcentage, 1), "%"),
data_id = paste(clust, GranZona, sep = "_"))) +
geom_bar_interactive(position = "fill", stat = "identity") +
theme_bw() +
scale_fill_brewer(palette = "Set3", direction = -1) +
labs(fill = "Residence locations\nby major zones\n(%)", y = "%", x = "profile")
# Display interactive graph
girafe(
ggobj = gg_granzona,
options = list(
opts_sizing(rescale = TRUE, # Allows resizing
width = 1), # Width relative to container
opts_hover(
css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"
),
opts_tooltip(
css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"
),
opts_hover_inv(css = "opacity:0.7;") # Other elements become slightly transparent
)
)
# Prepare the dataframe
Contexte <- dataclust %>%
group_by(clust) %>%
summarise(
num = n(),
Nb_moy_voit = mean(NumAuto),
Taille_menage = mean(NumPersHog),
Pct_ICS12 = mean(Pct_ICS12),
ParModUtam = mean(ParModUtam)
) %>%
as.data.frame()
# Convert clust to factor for better display
Contexte$clust <- as.factor(Contexte$clust)
## Graph 1: Average number of cars per household
gg_voitures <- ggplot(Contexte,
aes(x = Nb_moy_voit,
y = clust,
tooltip = paste("Profile:", clust,
"<br>Avg number of cars:", round(Nb_moy_voit, 2)),
data_id = clust)) +
geom_col_interactive(fill = "purple", width = 0.7) +
theme_bw() +
labs(title = "Average number of cars\nper household",
x = "Average number",
y = "Profile") +
theme(plot.title = element_text(size = 10, hjust = 0.5),
axis.text.y = element_text(size = 9),
axis.title = element_text(size = 9))
## Graph 2: Average household size
gg_taille <- ggplot(Contexte,
aes(x = Taille_menage,
y = clust,
tooltip = paste("Profile:", clust,
"<br>Average size:", round(Taille_menage, 2)),
data_id = clust)) +
geom_col_interactive(fill = "blue", width = 0.7) +
theme_bw() +
labs(title = "Average household size",
x = "Average size",
y = "") + # Remove y label to avoid duplicates
theme(plot.title = element_text(size = 10, hjust = 0.5),
axis.text.y = element_blank(), # Remove y labels
axis.ticks.y = element_blank(),
axis.title = element_text(size = 9))
## Graph 3: Percentage of ICS1 and ICS2 households
gg_ics <- ggplot(Contexte,
aes(x = Pct_ICS12,
y = clust,
tooltip = paste("Profile:", clust,
"<br>% ICS1 and ICS2:", round(Pct_ICS12, 1), "%"),
data_id = clust)) +
geom_col_interactive(fill = "green", width = 0.7) +
theme_bw() +
labs(title = "% ICS1 and ICS2 households\nper residence zones",
x = "Percentage (%)",
y = "") +
theme(plot.title = element_text(size = 10, hjust = 0.5),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_text(size = 9))
## Graph 4: Bike mode share
gg_velo <- ggplot(Contexte,
aes(x = ParModUtam,
y = clust,
tooltip = paste("Profile:", clust,
"<br>Bike mode share:", round(ParModUtam, 1), "%"),
data_id = clust)) +
geom_col_interactive(fill = "darkred", width = 0.7) +
theme_bw() +
labs(title = "Bike mode share\nper residence zones",
x = "Percentage (%)",
y = "") +
theme(plot.title = element_text(size = 10, hjust = 0.5),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_text(size = 9))
# Combine the 4 graphs side by side with patchwork
combined_plots1 <- (gg_voitures | gg_taille)+
plot_annotation(title = "Profile characteristics",
theme = theme(plot.title = element_text(hjust = 0.5)))
combined_plots2 <- (gg_ics | gg_velo)+
plot_annotation(title = "Profile characteristics",
theme = theme(plot.title = element_text(hjust = 0.5)))
# Display first two graphs
girafe(
ggobj = combined_plots1,
options = list(
opts_sizing(rescale = TRUE, width = 1),
opts_hover(css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"),
opts_tooltip(css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"),
opts_hover_inv(css = "opacity:0.7;")
)
)
# Display last two graphs
girafe(
ggobj = combined_plots2,
options = list(
opts_sizing(rescale = TRUE, width = 1), # Full width for the 4 graphs
opts_hover(css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"),
opts_tooltip(css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"),
opts_hover_inv(css = "opacity:0.7;")
)
)
# Useful later for retrieving the XY coordinates of the household's residence location
Cyclistes <- Cyclistes |>
mutate(id_hog_pers = row.names(dataclust)) |>
separate(id_hog_pers, into = c("id_hog", "id_pers"), sep = "-", remove = FALSE)
Cyclistes$profil <- dataclust$clust
write.xlsx(Cyclistes, "TypoProfilCyclistesFINAL.xlsx")
#browseURL("TypoProfilCyclistesFINAL.xlsx")
CoordXY_Menages <- read.xlsx("CoordXY_Menages.xlsx")
Cyclistes <- merge(Cyclistes, CoordXY_Menages, by = "id_hog")
UTAM <- st_read("EMU2019_UTAM_Bgta.gpkg")
## Reading layer `EMU2019_UTAM_Bgta_Carto_Variables_Nbre_Completo_v3' from data source `F:\Protocoles\Master 2\ASDT\2024-2025\1 - Analyses factorielles et classification\3_AFDM\FINAL\EMU2019_UTAM_Bgta.gpkg'
## using driver `GPKG'
## Simple feature collection with 132 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 569984 ymin: 492795.1 xmax: 626083.9 ymax: 557354.6
## Projected CRS: WGS 84 / UTM zone 18N
ResidCyclistes<- st_as_sf(Cyclistes,
coords = c("X_Hog","Y_Hog"),
crs = st_crs(UTAM)$srid)
plot(st_geometry(ResidCyclistes), pch = ".", main = "Residence location of cyclists surveyed in the EMU-2019 (Bogotá)", cex.main = 0.7)
# Necessary step to create smoothed density maps showing the concentration of cyclists' residence locations by profile
## To create a list of cyclist profiles, in order
Cyclistes$profil <- as.numeric(Cyclistes$profil)
Cyclistes <- Cyclistes[order(Cyclistes$profil, decreasing = FALSE), ]
ListProfils <- unique(Cyclistes$profil)
# Definition of the number of clusters
nclass <- 6
# Creation of a set of color palettes in a "list" object
palettes <- list(
pal1 = hcl.colors(nclass, "YlOrRd", rev = TRUE),
pal2 = hcl.colors(nclass, "Greens 3", rev = TRUE),
pal3 = hcl.colors(nclass, "Reds 3", rev = TRUE),
pal4 = hcl.colors(nclass, "Grays", rev = TRUE),
pal5 = hcl.colors(nclass, "Oslo", rev = FALSE),
pal6 = hcl.colors(nclass, "Red-Purple", rev = TRUE),
pal7 = hcl.colors(nclass, "Inferno", rev = TRUE),
pal8 = hcl.colors(nclass, "Reds", rev = TRUE),
pal9 = hcl.colors(nclass, "Blues 3", rev = TRUE),
pal10 = hcl.colors(nclass, "Purples 3", rev = TRUE)
)
# To define the extent for smoothing, use the UTAM extent
Emprise <- as.owin(st_buffer(UTAM, 1000))
# Setting margins to insert the main title and titles for each map
par(oma = c(3.5,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5))
# To split the window into 2 rows and 3 columns (6 profiles)
par(mfrow = c(2,3))
# To define the smoothing radius (in m.)
rayon <- 1000
# To define the pixel size (in m.)
resol <- 100 # here 100m x 100m = 1 ha.
# Loop to produce and map a density surface by profile
for (i in ListProfils){
# Retrieve datasets by profile
fichier <- Cyclistes[which(Cyclistes$profil == i),]
# To retrieve coordinates of residence locations
pts <- st_coordinates(st_as_sf(fichier, coords = c("X_Hog","Y_Hog"), crs = st_crs(UTAM)$srid))
# To create a ppp object (spatstat format) and incorporate the extent
fichier.ppp <- ppp(pts[,1], pts[,2], window = Emprise)
# To calculate the density surface (smoothing radius: 1000 m and spatial resolution of the image: 100m x 100m = 1 ha)
cartelissee <- density(fichier.ppp, sigma = rayon, kernel = "gaussian", eps = resol)
# Convert the smoothed surface to SpatRaster format
cartelissee.raster <- rast(cartelissee)
crs(cartelissee.raster) <- st_crs(UTAM)$srid # To specify a CRS for the raster object
# Convert densities to counts (multiply densities by the area of the circle)
values(cartelissee.raster) <- abs(values(cartelissee.raster) * pi * rayon **2)
# Clip the raster to the UTAM extent
cartelissee.raster <- mask(cartelissee.raster, UTAM)
# Bin breaks (equal intervals)
bks <- seq(from = min(values(cartelissee.raster), na.rm = TRUE),
to = max(values(cartelissee.raster), na.rm = TRUE),
by = (max(values(cartelissee.raster), na.rm = TRUE) - min(values(cartelissee.raster), na.rm = TRUE)) / nclass)
# Plot the map
plot(st_geometry(UTAM), border = NA, bg = "grey60")
mf_raster(x = cartelissee.raster,
var = "lyr.1",
type = "interval",
breaks = bks,
pal = unlist(palettes[(i)]),
add = TRUE,
leg_pos = "bottomright",
leg_title = "Nb. of cyclist residence\nlocations within a\n1 km radius.\n",
leg_title_cex = 0.5,
leg_val_cex = 0.45,
leg_size = 0.4,
leg_box_border = "white",
leg_box_cex = c(2.5, 2),
leg_adj = c(-2,1))
title(main = paste("profile", i, sep = "-"))
mtext(text = paste0("n = ", nrow(fichier), " cyclists"),
side = 3, line = -2, adj = 0.1, cex = 0.7)
}
mf_scale(
lwd = 1.5,
cex = 0.6,
pos = "bottomleft",
scale_units = "m"
)
mf_arrow(pos = "topright", adj = c(-3,-3))
# To display the main title and source
mtext("Residence locations of cyclists by profile in the Bogotá Metropolitan Area", cex = 1.3, side = 3, line = 1, adj = 0.5, outer = TRUE)
mtext(" Sources: EMU2019 - SDM - Smoothing radius: 1000 m and resolution: 1 ha - Package: mapsf", side = 1, line = 1, adj = 0, cex = 0.6, outer = TRUE)
Destination1erTrajet <- st_read("Centroides_ZAT_Destination_1er_Trajet_Depuis_Domicile.gpkg")
## Reading layer `Centroides_ZAT_Destination_1er_Trajet_Depuis_Domicile' from data source `F:\Protocoles\Master 2\ASDT\2024-2025\1 - Analyses factorielles et classification\3_AFDM\FINAL\Centroides_ZAT_Destination_1er_Trajet_Depuis_Domicile.gpkg'
## using driver `GPKG'
## Simple feature collection with 3231 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 532626 ymin: 469041.5 xmax: 652779.8 ymax: 600655.7
## Projected CRS: WGS 84 / UTM zone 18N
listDestination1erTrajet <- as.character(Destination1erTrajet$id_concat)
Cyclistes <- Cyclistes[Cyclistes$id_hog_pers %in% listDestination1erTrajet, ]
Destination1erTrajet <- merge(Destination1erTrajet, Cyclistes[c("profil", "id_hog_pers")], by.x = "id_concat", by.y = "id_hog_pers")
# Setting margins to insert the main title and titles for each map
par(oma = c(3.5,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5))
# To split the window into 2 rows and 3 columns (6 profiles)
par(mfrow = c(2,3))
# Definition of the number of clusters
nclass <- 6
# To define the smoothing radius (in m.)
rayon <- 1000
# To define the pixel size (in m.)
resol <- 100 # here 100m x 100m = 1 ha.
# Loop to produce and map a density surface by profile
for (i in ListProfils){
# Retrieve datasets by profile
fichier <- Destination1erTrajet[which(Destination1erTrajet$profil == i),]
# To retrieve coordinates of destination locations
pts <- st_coordinates(fichier)
# To create a ppp object (spatstat format) and incorporate the extent
fichier.ppp <- ppp(pts[,1], pts[,2], window = Emprise)
# To calculate the density surface (smoothing radius: 1000 m and spatial resolution of the image: 100m x 100m = 1 ha)
cartelissee <- density(fichier.ppp, sigma = rayon, kernel = "gaussian", eps = resol)
# Convert the smoothed surface to SpatRaster format
cartelissee.raster <- rast(cartelissee)
crs(cartelissee.raster) <- st_crs(UTAM)$srid # To specify a CRS for the raster object
# Convert densities to counts (multiply densities by the area of the circle)
values(cartelissee.raster) <- abs(values(cartelissee.raster) * pi * rayon **2)
# Clip the raster to the UTAM extent
cartelissee.raster <- mask(cartelissee.raster, UTAM)
# Bin breaks (equal intervals)
bks <- seq(from = min(values(cartelissee.raster), na.rm = TRUE),
to = max(values(cartelissee.raster), na.rm = TRUE),
by = (max(values(cartelissee.raster), na.rm = TRUE) - min(values(cartelissee.raster), na.rm = TRUE)) / nclass)
# Plot the map
plot(st_geometry(UTAM), border = NA, bg = "grey60")
mf_raster(x = cartelissee.raster,
var = "lyr.1",
type = "interval",
breaks = bks,
pal = unlist(palettes[(i)]),
add = TRUE,
leg_pos = "bottomright",
leg_title = "Nb. of destinations\nwithin a 1 km\nradius.\n",
leg_title_cex = 0.5,
leg_val_cex = 0.45,
leg_size = 0.4,
leg_box_border = "white",
leg_box_cex = c(2.5, 2),
leg_adj = c(-2,1))
title(main = paste("profile", i, sep = "-"))
mtext(text = paste0("n = ", nrow(fichier), " cyclists"),
side = 3, line = -2, adj = 0.1, cex = 0.7)
}
mf_scale(
lwd = 1.5,
cex = 0.6,
pos = "bottomleft",
scale_units = "m"
)
mf_arrow(pos = "topright", adj = c(-3,-3))
# To display the main title and source
mtext("Destination of first bicycle trips from home in Bogotá", cex = 1.3, side = 3, line = 1, adj = 0.5, outer = TRUE)
mtext(" Sources: EMU2019 - SDM - Smoothing radius: 1000 m and resolution: 1 ha - Package: mapsf", side = 1, line = 1, adj = 0, cex = 0.6, outer = TRUE)