1 Calculation of cyclist profiles combining FAMD and HCA
2 Mapping the home and destination locations of cyclist profiles
- 2.1 Map of cyclist residence locations by profile
  - 2.1.1 Data preparation for map creation
  - 2.1.2 Series of density maps showing the concentration of cyclists’ residence locations by profile
- 2.2 Map of the destination locations of the first bicycle trip from home
  - 2.2.1 Data preparation for map creation
  - 2.2.2 Creation of the second series of destination location maps

This program presents the steps used to construct a typology of bicycle users based on the 2019 Household Travel Survey conducted in Bogotá and made available as open data. The method relies on a factor analysis of mixed data (FAMD) and a hierarchical cluster analysis (HCA). The approach combines sociodemographic characteristics of 3,241 individuals and variables describing their mobility practices. Maps showing the concentration of residence locations and destination locations for the six cyclist profiles are provided The method developed in R is reproducible for other urban areas and other groups of people using other modes of transport.

1 Calculation of cyclist profiles combining FAMD and HCA

1.1 Loading required packages

library(openxlsx) # For importing/exporting from/to Excel
library(FactoMineR) # To run factor analyses (such as FAMD) and classifications (e.g., HCA)
library(factoextra) # To enhance the rendering of graphs produced by FactoMineR
library(dplyr) # To facilitate data manipulation
library(tidyr) # To switch from long to wide format
library(fmsb) # To produce radar charts
library(ggplot2) # To produce advanced graphics
library(sf) # To manipulate spatial objects
library(spatstat) # For spatial smoothing
library(terra) # For processing raster data
library(mapsf) # For cartographic representation and map layout
library(ggnewscale) # To set color palettes for ggplot graphs
library(plotly) # To plot 3D graphs
library(geometry) # To plot convex hulls
library(ggiraph) # To create interactive graphs from ggplot
library(patchwork) # To combine multiple graphs on the same layout

1.2 Importing input data (Excel file)

# Import of the table of individuals who used the bicycle as their main mode of transport in the city the day before the survey
Cyclistes <- read.xlsx("Cyclistes.xlsx", sheet = "VariablesCyclistes")
rownames(Cyclistes) <- Cyclistes$id_hog_pers # To add the person's ID as row names
Cyclistes$id_hog_pers <- NULL # To remove the column
head(Cyclistes)

##             Edad   Sexo      NivEducCL  LicCondu Ocasional DuracionCL MotivoCL
## P00316-4 Inf. 15  Mujer PrimariaSecund LicCon-No   Ocas-No    Inf. 30  Estudio
## 12861-1    40-64  Mujer  Universitario LicCon-No   Ocas-No    Inf. 30     Otro
## 5636-4   Inf. 15  Mujer PrimariaSecund LicCon-No   Ocas-No    Inf. 30  Estudio
## 10497-3    15-39 Hombre          Media LicCon-No   Ocas-Si      60-89     Otro
## P05845-6   15-39 Hombre          Media LicCon-No   Ocas-No      30-59     Trab
## 6973-1     40-64 Hombre PrimariaSecund LicCon-Si   Ocas-No    Inf. 30     Trab
##          Estrato  LugResid  GranZona DistanciaCL Duracion Distancia PartModInd
## P00316-4   Estr2  MOSQUERA Periferia      2-4 km       16      2480      100.0
## 12861-1    Estr2  MOSQUERA Periferia    Inf 2 km       10       742      100.0
## 5636-4     Estr0    SOACHA       Sur      2-4 km       15      2056      100.0
## 10497-3    Estr5  FONTIBON     Oeste    Inf 2 km       60       819      100.0
## P05845-6   Estr0 TOCANCIPA Periferia 15 km y más       30     27647      100.0
## 6973-1     Estr2    MADRID Periferia     6-10 km       15      8532       66.6
##          Experi NumViajDia FrecSemana FrecFinSem NumAuto NumMoto NumPersHog
## P00316-4    5.0          2        5.0          0       0       2          5
## 12861-1     5.0          4        5.0          0       0       0          2
## 5636-4      1.5          2        4.5          0       0       0          4
## 10497-3     3.0          2        0.0          0       0       0          4
## P05845-6    5.0          2        4.0          1       0       1          8
## 6973-1      3.0          4        4.0          1       0       0          2
##          Pct_ICS12 ParModUtam
## P00316-4      30.2  18.205580
## 12861-1       12.5  18.205580
## 5636-4        22.1   6.782659
## 10497-3        0.7   1.798401
## P05845-6      38.4  10.204420
## 6973-1        49.2  15.953432

str(Cyclistes)

## 'data.frame':    3241 obs. of  23 variables:
##  $ Edad       : chr  "Inf. 15" "40-64" "Inf. 15" "15-39" ...
##  $ Sexo       : chr  "Mujer" "Mujer" "Mujer" "Hombre" ...
##  $ NivEducCL  : chr  "PrimariaSecund" "Universitario" "PrimariaSecund" "Media" ...
##  $ LicCondu   : chr  "LicCon-No" "LicCon-No" "LicCon-No" "LicCon-No" ...
##  $ Ocasional  : chr  "Ocas-No" "Ocas-No" "Ocas-No" "Ocas-Si" ...
##  $ DuracionCL : chr  "Inf. 30" "Inf. 30" "Inf. 30" "60-89" ...
##  $ MotivoCL   : chr  "Estudio" "Otro" "Estudio" "Otro" ...
##  $ Estrato    : chr  "Estr2" "Estr2" "Estr0" "Estr5" ...
##  $ LugResid   : chr  "MOSQUERA" "MOSQUERA" "SOACHA" "FONTIBON" ...
##  $ GranZona   : chr  "Periferia" "Periferia" "Sur" "Oeste" ...
##  $ DistanciaCL: chr  "2-4 km" "Inf 2 km" "2-4 km" "Inf 2 km" ...
##  $ Duracion   : num  16 10 15 60 30 15 30 45 75 60 ...
##  $ Distancia  : num  2480 742 2056 819 27647 ...
##  $ PartModInd : num  100 100 100 100 100 66.6 100 100 33.3 100 ...
##  $ Experi     : num  5 5 1.5 3 5 3 5 5 4 5 ...
##  $ NumViajDia : num  2 4 2 2 2 4 2 1 2 3 ...
##  $ FrecSemana : num  5 5 4.5 0 4 4 5 0 0 4 ...
##  $ FrecFinSem : num  0 0 0 0 1 1 0 0 0 0 ...
##  $ NumAuto    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NumMoto    : num  2 0 0 0 1 0 0 0 1 0 ...
##  $ NumPersHog : num  5 2 4 4 8 2 6 5 6 7 ...
##  $ Pct_ICS12  : num  30.2 12.5 22.1 0.7 38.4 49.2 19.4 46.1 44.9 55.7 ...
##  $ ParModUtam : num  18.21 18.21 6.78 1.8 10.2 ...

summary(Cyclistes)

##      Edad               Sexo            NivEducCL           LicCondu        
##  Length:3241        Length:3241        Length:3241        Length:3241       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   Ocasional          DuracionCL          MotivoCL           Estrato         
##  Length:3241        Length:3241        Length:3241        Length:3241       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    LugResid           GranZona         DistanciaCL           Duracion     
##  Length:3241        Length:3241        Length:3241        Min.   :  0.00  
##  Class :character   Class :character   Class :character   1st Qu.: 13.00  
##  Mode  :character   Mode  :character   Mode  :character   Median : 22.00  
##                                                           Mean   : 38.69  
##                                                           3rd Qu.: 40.00  
##                                                           Max.   :810.00  
##    Distancia       PartModInd         Experi        NumViajDia   
##  Min.   :   46   Min.   : 12.50   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 1727   1st Qu.: 60.00   1st Qu.:4.000   1st Qu.:2.000  
##  Median : 4161   Median :100.00   Median :5.000   Median :2.000  
##  Mean   : 7438   Mean   : 84.56   Mean   :4.398   Mean   :2.111  
##  3rd Qu.: 9400   3rd Qu.:100.00   3rd Qu.:5.000   3rd Qu.:2.000  
##  Max.   :86940   Max.   :100.00   Max.   :5.000   Max.   :8.000  
##    FrecSemana      FrecFinSem      NumAuto          NumMoto      
##  Min.   :0.000   Min.   :0.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:0.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :4.000   Median :0.00   Median :0.0000   Median :0.0000  
##  Mean   :3.175   Mean   :0.34   Mean   :0.2391   Mean   :0.1765  
##  3rd Qu.:5.000   3rd Qu.:1.00   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :5.000   Max.   :2.00   Max.   :4.0000   Max.   :5.0000  
##    NumPersHog       Pct_ICS12        ParModUtam    
##  Min.   : 1.000   Min.   :  0.00   Min.   : 0.000  
##  1st Qu.: 3.000   1st Qu.:  9.10   1st Qu.: 5.095  
##  Median : 4.000   Median : 23.70   Median : 7.974  
##  Mean   : 3.773   Mean   : 25.45   Mean   : 9.138  
##  3rd Qu.: 5.000   3rd Qu.: 39.50   3rd Qu.:12.190  
##  Max.   :13.000   Max.   :100.00   Max.   :23.040

1.3 Calculation of a first FAMD with all variables to determine cyclist profiles

res <- FAMD(Cyclistes, graph = FALSE)

contrib <- transform(data.frame(res$ind$contrib), contribPlan1 = Dim.1 + Dim.2)
selected_individuals <- rownames(contrib)[contrib$contribPlan1 >= quantile(contrib$contribPlan1, 0.99)] # Creating a list of individuals whose labels will be displayed on the graphs (only 1% are selected here to avoid cluttering the graphs)
rm(contrib)

plot.FAMD(res, title = "FAMD individual plot with categorical values", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, label = c('ind','quali.var'), select = selected_individuals)

plot.FAMD(res, title = "Plot of categorical values", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, invisible = "ind")

plot.FAMD(res, choix = "var", title = "Plot of centers of quantitative and categorical variables", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, xlim = c(0, 0.8), ylim = c(0, 0.55))

plot.FAMD(res, choix = "quanti", title = "Correlation plot of quantitative variables", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75)

1.4 Adjustment of the first FAMD to increase inertia on the first plane

1.4.1 Removal of poorly discriminating variables

Cyclistes$Experi <- NULL
Cyclistes$NumMoto <- NULL
Cyclistes$NumViajDia <- NULL
Cyclistes$PartModInd <- NULL

1.4.2 Display of the list of remaining variables

print(as.data.frame(colnames(Cyclistes)))

##    colnames(Cyclistes)
## 1                 Edad
## 2                 Sexo
## 3            NivEducCL
## 4             LicCondu
## 5            Ocasional
## 6           DuracionCL
## 7             MotivoCL
## 8              Estrato
## 9             LugResid
## 10            GranZona
## 11         DistanciaCL
## 12            Duracion
## 13           Distancia
## 14          FrecSemana
## 15          FrecFinSem
## 16             NumAuto
## 17          NumPersHog
## 18           Pct_ICS12
## 19          ParModUtam

1.4.3 Removal of individuals with abnormally long average trip durations

Cyclistes <- Cyclistes[Cyclistes$Duracion < 300,]

1.5 Second FAMD calculation after removing poorly discriminating variables and setting some as supplementary

res <- FAMD(Cyclistes, sup.var = c(2,6,9,10,11,15,17), graph = FALSE)

contrib <- transform(data.frame(res$ind$contrib), contribPlan1 = Dim.1 + Dim.2)
selected_individuals <- rownames(contrib)[contrib$contribPlan1 >= quantile(contrib$contribPlan1, 0.95)] # Creating a list of individuals whose labels will be displayed on the graphs (only 5% are selected here to avoid cluttering the graphs)
rm(contrib)

plot.FAMD(res, title = "FAMD individual plot with categorical values", cex = 0.6, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, label = c('ind','quali.var'), select = selected_individuals)

plot.FAMD(res, title = "Plot of categorical values", cex = 0.7, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, invisible = "ind")

plot.FAMD(res, choix = "var", title = "Plot of centers of quantitative and categorical variables", cex = 0.7, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75, xlim = c(0, 0.55), ylim = c(0, 0.45))

plot.FAMD(res, choix = "quanti", title = "Correlation plot of quantitative variables", cex = 0.9, cex.main = 1, graph.type = "ggplot", cex.axis = 0.75)

1.6 Running HCA on the results of the FAMD

1.6.1 Inertia gain plot and 2D factor plane (static mode)

nbclasses <- 6 # Test with different numbers of clusters
res.hcpc <- HCPC(res, kk = Inf, nb.clust = nbclasses, consol = FALSE, graph = FALSE) # kk = Inf means no prior partition, consol = FALSE means no consolidation (by k-means) after HCA
plot(res.hcpc, choice = "tree", labels = FALSE)

# Creation of the initial 2D graph
cluster_colors <- c("blue", "green", "red", "yellow", "black", "cyan")
d <- fviz_cluster(res.hcpc,
             repel = TRUE,
             show.clust.cent = FALSE,   # No centers
             palette = cluster_colors,
             ggtheme = theme_gray(),
             main = "Factor plane",
             geom = "point",
             pointsize = 0.25,
             alpha = 0.6,
             ellipse = TRUE,
             ellipse.type = "convex",
             ellipse.alpha = 0.2,
             ellipse.linetype = "solid",  # Solid line for ellipses
             star.plot = FALSE,
             shape = 19)                  # Identical points

# Modify the graph
d_modified <- d +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed") +  
  geom_vline(xintercept = 0, color = "black", linetype = "dashed") +  
  theme(
    plot.margin = margin(10, 10, 10, 10), 
    aspect.ratio = 0.5
  )

# Display the modified graph
print(d_modified)

1.6.2 Interactive 3D factor plane

# Retrieve individual coordinates on the factor axes
ind_coords <- as.data.frame(res$ind$coord)[, 1:3]  # Axes 1, 2, 3
colnames(ind_coords) <- c("Dim.1", "Dim.2", "Dim.3")  # Rename columns
ind_coords$cluster <- as.factor(res.hcpc$data.clust$clust)  # Add clusters
ind_coords$cluster <- factor(ind_coords$cluster, levels = sort(unique(ind_coords$cluster)))

# Retrieve inertia
inertia <- res$eig[1:3, 2]  # % inertia for axes 1, 2, and 3

# Assign colors to clusters
names(cluster_colors) <- levels(ind_coords$cluster)

# Function to get the color of a given cluster (used for convex hulls)
get_cluster_color <- function(cluster) {
  return(cluster_colors[as.character(cluster)])
}

# Initialize the graph
fig <- plot_ly()

# Add individual points in black
fig <- fig %>% add_trace(
  data = ind_coords, 
  x = ~Dim.1, y = ~Dim.2, z = ~Dim.3,
  type = "scatter3d", 
  mode = "markers",
  marker = list(color = "black", size = 1, opacity = 0.8),
  showlegend = FALSE,
  inherit = FALSE
)

# Calculate maximum ranges for each axis
max_range <- max(abs(as.matrix(ind_coords[,1:3]))) * 1.1

# Create dashed axis lines passing through the origin
fig <- fig %>% 
  add_trace(
    x = c(-max_range, max_range), 
    y = c(0, 0), 
    z = c(0, 0),
    type = "scatter3d", 
    mode = "lines",
    line = list(color = "grey80", dash = "dash", width = 0.5),
    showlegend = FALSE,
    inherit = FALSE,
    name = ""
  ) %>%
  add_trace(
    x = c(0, 0), 
    y = c(-max_range, max_range), 
    z = c(0, 0),
    type = "scatter3d", 
    mode = "lines",
    line = list(color = "grey80", dash = "dash", width = 0.5),
    showlegend = FALSE,
    inherit = FALSE,
    name = ""
  ) %>% 
  add_trace(
    x = c(0, 0), 
    y = c(0, 0), 
    z = c(-max_range, max_range),
    type = "scatter3d", 
    mode = "lines",
    line = list(color = "grey80", dash = "dash", width = 0.5),
    showlegend = FALSE,
    inherit = FALSE,
    name = ""
  )

# Add convex hulls for each cluster with the palette colors
for (clus in levels(ind_coords$cluster)) {
  cluster_points <- ind_coords[ind_coords$cluster == clus, c("Dim.1", "Dim.2", "Dim.3")]
  hull <- convhulln(cluster_points)  # Direct calculation, without condition
  
  col <- cluster_colors[as.character(clus)]

  fig <- fig %>% add_trace(
    type = "mesh3d",
    x = cluster_points$Dim.1,
    y = cluster_points$Dim.2,
    z = cluster_points$Dim.3,
    i = hull[,1] - 1,
    j = hull[,2] - 1,
    k = hull[,3] - 1,
    opacity = 0.2,
    facecolor = rep(col, nrow(hull)), 
    flatshading = TRUE,
    showscale = FALSE,
    name = paste("Cluster", clus),
    showlegend = TRUE,
    inherit = FALSE
  )

}

# Disable projected panes and add a uniform gray background
fig <- fig %>%
  layout(
    paper_bgcolor = "#D3D3D3",
    plot_bgcolor = "#D3D3D3",
    scene = list(
      xaxis = list(
        title = "",
        showbackground = FALSE,
        showline = FALSE,          # Remove axis line
        showticklabels = FALSE,    # Remove numeric tick labels
        zeroline = FALSE,          # Remove default zero line
        showgrid = FALSE           # Optional: remove grid
      ),
      yaxis = list(
        title = "",
        showbackground = FALSE,
        showline = FALSE,
        showticklabels = FALSE,
        zeroline = FALSE,
        showgrid = FALSE
      ),
      zaxis = list(
        title = "",
        showbackground = FALSE,
        showline = FALSE,
        showticklabels = FALSE,
        zeroline = FALSE,
        showgrid = FALSE
      ),
      aspectmode = "manual",
      aspectratio = list(x = 1.5, y = 1, z = 1)
    ),
    showlegend = TRUE
  )

# Configure the legend with a title
fig <- fig %>% layout(
  legend = list(
    title = list(text = "<b>Profiles</b>"),  # Legend title
    font = list(size = 10)  # Title font style
  )
)



# Display the graph
fig

1.7 Calculation of the table with cluster sizes and the associated bar chart

dataclust <- as.data.frame(res.hcpc[["data.clust"]])
IndividusClasse <- table(dataclust$clust) # To create a table with the number of individuals per each cluster
print(IndividusClasse)

## 
##    1    2    3    4    5    6 
##  339  696  471 1144  232  322

barplot(table(dataclust$clust), main = "Number of individuals by profile")

1.8 Production of graphs to describe the clusters

1.8.1 Identification of categorical variables with only two values

# This operation is necessary to simplify the graphs later
TableauVariablesCategorielles <- Filter(is.character, Cyclistes)

NbModalitesParVariable <- data.frame()

for (i in colnames(TableauVariablesCategorielles)){

  # Retrieve the number of values for each categorical variable
  fichier <- length(unique(TableauVariablesCategorielles[[i]]))
  NbModalitesParVariable <- rbind(NbModalitesParVariable, fichier)
  
}

rm(i, fichier)

NbModalitesParVariable$Variable <- colnames(TableauVariablesCategorielles)

VariablesDeuxModalites <- NbModalitesParVariable[which(NbModalitesParVariable[,1] == 2),]
VariablesDeuxModalites <- as.character(VariablesDeuxModalites$Variable)

1.8.2 Setting the v.test threshold for selecting discriminant variables to describe clusters

# By default v.test > |1.96| - In this case, raising the threshold reduces the number of variables that will describe the clusters. Only extremely significant ones are kept.
Seuil.v.test <- 3.29 # Accepting a 1 in 1000 chance of error
# learn more https://www.medcalc.org/manual/values-of-the-normal-distribution.php

1.8.3 Creation of tables describing profiles by quantitative and categorical variables and display of bar charts

# For variables with only two categorical values, only one is kept
for (i in 1:nbclasses){

  # Retrieve the description of each of the N clusters by quantitative variables into tables
  b <- as.data.frame(res.hcpc$desc.var$quanti[[i]])
  b <- b[which(colnames(b) == 'v.test')]
  b <- signif(b,3) # To round values

  # Retrieve the description of each of the N clusters by categorical variables into tables
  c <- as.data.frame(res.hcpc$desc.var$category[[i]])
  d <- as.data.frame(c$label <- rownames(c))
  d <- as.data.frame(c$label <- sub("\\=.*", "", c$label))
  d <- as.data.frame(c[which(c$label %in% VariablesDeuxModalites), ])
  d <- as.data.frame(d[which(d$v.test > 0),])
  e <- as.data.frame(c[which(!c$label %in% VariablesDeuxModalites), ])
  f <- as.data.frame(rbind(d, e))
  g <- as.data.frame(f$label <- NULL)
  c <- signif(f,3) # To round values
  c <- c[which(colnames(c) == 'v.test')]

  # Combine tables with quantitative and categorical variables
  h <- as.data.frame(rbind(b,c))
  h <- as.data.frame(cbind(h, h$label <- row.names(h)))
  colnames(h) <- c("v.test", "label")
  h <- as.data.frame(h[order(h$v.test, decreasing = TRUE), ])
  h <- as.data.frame(h[which(!h$v.test == "Inf"),])
  h <- as.data.frame(h[which(abs(h$v.test) >= Seuil.v.test),])

  # To plot horizontal bar charts
  op <- par(oma = c(0,7,0,0))
  barplot((h$v.test), names = row.names(h), border = "white", horiz = TRUE, las = 1, xlim = c(-5 - max(abs(h$v.test)), 5 + max(abs(h$v.test))), cex.names=0.8, main = paste("profile", i, sep = ""))
  par(op)
  
}

rm(b,c,d,e,f,g,h,i)

1.8.4 Creation of radar charts to visualize, for each profile, its sociodemographic composition and travel patterns

#to reduce margins and set a grey background
opar <- par(mar = c(2, 2, 2, 2), bg = "grey60")

# Color vector
colors_border = c("blue", "green" , "red", "yellow", "black", "cyan")
colors_in = c("blue", "green" , "red", "yellow", "black", "cyan")

## Individual variables (table preparation)

# Gender and driving license
genre_permis_radar <- dataclust %>%
  group_by(clust) %>%
  summarise(muj = sum(Sexo == "Mujer") / n() * 100,
            permis_cond = sum(LicCondu == "LicCon-Si") / n() * 100)

# AGE
age_radar <- dataclust %>%
  group_by(clust, Edad) %>%
  summarise(effectifs = n()) %>%
  group_by(clust) %>%
  mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
  pivot_wider(names_from = Edad, values_from = effectifs)

# replace NA by 0
age_radar <- replace(age_radar, is.na(age_radar), 0)

# reorder columns
age_radar <- age_radar[c(1,5,3,4,2)]

# EDUCATION LEVEL
NivEduc_radar <- dataclust %>%
  group_by(clust, NivEducCL) %>%
  summarise(effectifs = n()) %>%
  group_by(clust) %>%
  mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
  pivot_wider(names_from = NivEducCL, values_from = effectifs)

# replace NA by 0
NivEduc_radar <- replace(NivEduc_radar, is.na(NivEduc_radar), 0)

# reorder columns
NivEduc_radar <- NivEduc_radar[c(1,3,2,4,5)]

# combine dataframes
var_indiv_radar <- cbind(genre_permis_radar, age_radar[-1], NivEduc_radar[-1])

# assign rownames
rownames(var_indiv_radar) <- paste("profile" , var_indiv_radar$clust , sep = "-")
var_indiv_radar$clust <- NULL

# To move a column
var_indiv_radar <- var_indiv_radar %>% relocate(permis_cond, .after = Universitario)

summary(var_indiv_radar)

##       muj           Inf. 15             15-39             40-64      
##  Min.   :14.75   Min.   : 0.00000   Min.   : 0.3106   Min.   : 0.00  
##  1st Qu.:19.68   1st Qu.: 0.00000   1st Qu.:59.4782   1st Qu.:18.31  
##  Median :23.99   Median : 0.08741   Median :63.3530   Median :30.58  
##  Mean   :24.15   Mean   :16.71588   Mean   :56.1407   Mean   :23.77  
##  3rd Qu.:29.94   3rd Qu.: 0.36698   3rd Qu.:65.7156   3rd Qu.:31.89  
##  Max.   :31.99   Max.   :99.68944   Max.   :85.1380   Max.   :34.88  
##       >=65        PrimariaSecund       Media            Tecnico      
##  Min.   :0.0000   Min.   : 5.015   Min.   : 0.9317   Min.   : 0.000  
##  1st Qu.:0.7492   1st Qu.:11.119   1st Qu.:10.2151   1st Qu.: 9.937  
##  Median :3.0593   Median :34.419   Median :28.5920   Median :13.927  
##  Mean   :3.3780   Mean   :38.559   Mean   :21.7982   Mean   :12.517  
##  3rd Qu.:4.4957   3rd Qu.:50.148   3rd Qu.:32.3120   3rd Qu.:16.379  
##  Max.   :9.1954   Max.   :98.758   Max.   :35.0318   Max.   :21.444  
##  Universitario      permis_cond   
##  Min.   : 0.3106   Min.   : 0.00  
##  1st Qu.: 6.5672   1st Qu.:24.81  
##  Median :19.0971   Median :64.13  
##  Mean   :27.1259   Mean   :53.00  
##  3rd Qu.:34.8861   3rd Qu.:81.09  
##  Max.   :81.7109   Max.   :91.30

# to add two rows at the top of the dataframe with the max and min for each variable (based on the previous summary)
var_indiv_radar <- rbind(rep(100,10) , rep(0,10) , var_indiv_radar)

# to reorder columns
var_indiv_radar <- var_indiv_radar[, c(1, rev(seq_along(var_indiv_radar)[-1]))]

# plot radar chart
radarchart(var_indiv_radar, axistype = 1, seg = 5, pty = 32, centerzero = FALSE,
           #custom polygon
           pcol = colors_border, pfcol = adjustcolor(colors_in, alpha.f = 0), plwd = 2 , plty = 1,
           #custom the grid
           cglcol = "white", cglty = 3, axislabcol = "white", caxislabels = rep("", 10), cglwd = 0.1,
           #custom labels
           vlcex = 0.5, calcex = 0.5
)

# Add a legend
legend(x = 1.2, y = 1.2, legend = rownames(var_indiv_radar[-c(1,2),]), bty = "n", pch = 18 , col = colors_border, text.col = "black", cex = 0.5, pt.cex = 0.9)

# add a title
title(main = "Cyclist characteristics by profile\n(summary)", cex.main = 0.8)

# Split the screen in 6 parts to plot each radar separately
opar <- par(oma = c(0,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5), mfrow = c(2,3), bg = "grey60")

# Plot the radar charts
for(i in 1:length(unique(dataclust$clust))){
  radarchart(var_indiv_radar[c(1, 2, i+2), ], axistype = 1, seg = 5, pty = 32, centerzero = FALSE,
             #custom polygon
             pcol=colors_border[i], pfcol = adjustcolor(colors_in[i], alpha.f = 0), plwd = 2 , plty = 1,
             #custom the grid
             cglcol = "white", cglty = 3, axislabcol = "white", cglwd = 0.1, caxislabels = rep("", 10),
             #custom labels
             vlcex = 0.5, palcex = 0.8)
  
  # To add a title to each radar
  title(main = paste("profile", i, sep = "-"), cex.main = 0.8)
}

# To insert a main title
mtext("Cyclist characteristics by profile", 
      cex = 1, side = 3,line = 1, adj = 0.5, outer = TRUE)

# to reset graphic parameters
par(opar)


## Variables characterizing trips and cycling practices (table preparation)

# Trips
trajet_radar <- dataclust %>%
  group_by(clust) %>%
  summarise(
    travail = sum(MotivoCL == "Trab") / n() * 100,
    ecole = sum(MotivoCL == "Estudio") / n() * 100,
    loisir = sum(MotivoCL == "Ocio") / n() * 100,
    autre = sum(MotivoCL == "Otro") / n() * 100,
    durée = mean(Duracion),
    distance = mean(Distancia),
    FreqSemaine = mean(FrecSemana),
    FreqWeekEnd = mean (FrecFinSem),
    Occasionnel = sum(Ocasional == "Ocas-Si") / n() * 100
  ) %>%
  as.data.frame()

rownames(trajet_radar) <- paste("profile" , trajet_radar$clust , sep = "-")
trajet_radar$clust <- NULL

summary(trajet_radar)

##     travail           ecole            loisir           autre       
##  Min.   : 1.242   Min.   : 1.724   Min.   : 1.699   Min.   : 8.555  
##  1st Qu.:31.253   1st Qu.: 5.573   1st Qu.: 4.155   1st Qu.:14.434  
##  Median :57.429   Median :10.024   Median : 7.879   Median :22.211  
##  Mean   :46.428   Mean   :18.618   Mean   : 8.672   Mean   :26.282  
##  3rd Qu.:61.447   3rd Qu.:16.114   3rd Qu.:11.778   3rd Qu.:26.824  
##  Max.   :77.155   Max.   :67.702   Max.   :18.584   Max.   :64.511  
##      durée          distance      FreqSemaine       FreqWeekEnd     
##  Min.   :20.63   Min.   : 4763   Min.   :0.04095   Min.   :0.01724  
##  1st Qu.:27.21   1st Qu.: 5683   1st Qu.:3.76256   1st Qu.:0.11443  
##  Median :33.96   Median : 7027   Median :3.95093   Median :0.34907  
##  Mean   :34.58   Mean   : 8305   Mean   :3.34074   Mean   :0.31478  
##  3rd Qu.:37.84   3rd Qu.: 7446   3rd Qu.:4.11719   3rd Qu.:0.52048  
##  Max.   :54.86   Max.   :18194   Max.   :4.22923   Max.   :0.55944  
##   Occasionnel      
##  Min.   : 0.08741  
##  1st Qu.: 0.89670  
##  Median : 4.27665  
##  Mean   :20.50617  
##  3rd Qu.:13.97850  
##  Max.   :97.41379

# to add two rows at the top of the dataframe with the max and min for each variable (based on the previous summary)
trajet_radar <- rbind(c(rep(100,4), 60, 20000, 5, 1, 100), rep(0, length(colnames(trajet_radar))), trajet_radar)

# to reorder columns
trajet_radar <- trajet_radar[, c(1, rev(seq_along(trajet_radar)[-1]))]

# plot radar chart
radarchart(trajet_radar, axistype = 1 , seg = 5, pty = 32, centerzero = FALSE,
           #custom polygon
           pcol = colors_border, pfcol = adjustcolor(colors_in, alpha.f = 0), plwd = 2 , plty = 1,
           #custom the grid
           cglcol = "white", cglty = 3, axislabcol = "white", cglwd = 0.1, caxislabels = rep("", 10),
           #custom labels
           vlcex = 0.5, calcex = 0.5
)

# Add a legend
legend(x = 1.2, y = 1.2, legend = rownames(trajet_radar[-c(1,2),]), bty = "n", pch = 18 , col = colors_border, text.col = "black", cex = 0.5, pt.cex = 0.9)

# add a title
title(main = "Trip characteristics by profile\n(summary)", cex.main = 0.8)

# Split the screen in 6 parts to plot each radar separately
opar <- par(oma = c(0,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5), mfrow = c(2,3), bg = "grey60")

# Plot the radar charts
for(i in 1:length(unique(dataclust$clust))){
  radarchart(trajet_radar[c(1, 2, i+2), ], axistype = 1, seg = 5, pty = 32, centerzero = FALSE,
             #custom polygon
             pcol = colors_border[i], pfcol = adjustcolor(colors_in[i], alpha.f = 0), plwd = 2 , plty = 1,
             #custom the grid
             cglcol = "white", cglty = 3, axislabcol = "white", cglwd = 0.1, caxislabels = rep("", 10),
             #custom labels
             vlcex = 0.5, palcex = 0.8)
  
  # To add a title to each radar
  title(main = paste("profile", i, sep = "-"), cex.main = 0.8)
}

# To insert a main title
mtext("Trip characteristics by profile", 
      cex = 1, side = 3,line = 1, adj = 0.5, outer = TRUE)

# to reset graphic parameters
par(opar)

1.8.5 Creation of vertical stacked bar charts to visualize, for each profile, a first set of contextual variables

## Variables characterizing households (table preparation)

# Stratum characterizing the housing and neighborhood standard
StratesLogt <- dataclust %>%
  group_by(clust, Estrato) %>%
  summarise(effectifs = n(), .groups = 'drop') %>%
  group_by(clust) %>%
  mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
  pivot_wider(names_from = Estrato, values_from = effectifs)

# replace NA by 0
StratesLogt <- replace(StratesLogt, is.na(StratesLogt), 0)

# To move a column
StratesLogt <- StratesLogt %>% relocate(Estr0, .before = Estr1)

# To coerce df to long format
StratesLogt_long <- StratesLogt %>%
  pivot_longer(cols = -clust, names_to = "estrato", values_to = "pourcentage")

# Create interactive graph with ggiraph
gg_strates <- ggplot(StratesLogt_long, 
                     aes(x = as.factor(clust), 
                         y = pourcentage, 
                         fill = factor(estrato, levels = rev(unique(estrato))),
                         tooltip = paste("Profile:", clust, 
                                        "<br>Stratum:", estrato, 
                                        "<br>Percentage:", round(pourcentage, 1), "%"),
                         data_id = paste(clust, estrato, sep = "_"))) +
  geom_bar_interactive(position = "fill", stat = "identity") +
  theme_bw() +
  scale_fill_brewer(palette = "YlOrRd", direction = -1) +
  labs(fill = "Housing strata\n(%)", y = "%", x = "profile")

# Display interactive graph with customized hover
girafe(
  ggobj = gg_strates,
  options = list(opts_sizing(rescale = TRUE,  # Allows resizing
                width = 1),      # Width relative to container
    opts_hover(
      css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"
    ),
    opts_tooltip(
      css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"
    ),
    opts_hover_inv(css = "opacity:0.7;")  # Other elements become slightly transparent
  )
)

## Variables characterizing places of residence (table preparation)

# Major residential zones
GranZona <- dataclust %>%
  group_by(clust, GranZona) %>%
  summarise(effectifs = n(), .groups = 'drop') %>%
  group_by(clust) %>%
  mutate(effectifs = effectifs / sum(effectifs) * 100) %>%
  pivot_wider(names_from = GranZona, values_from = effectifs)

# replace NA by 0
GranZona <- replace(GranZona, is.na(GranZona), 0)

# To coerce df to long format
GranZona_long <- GranZona %>%
  pivot_longer(cols = -clust, names_to = "GranZona", values_to = "pourcentage")

# Create interactive graph with ggiraph
gg_granzona <- ggplot(GranZona_long, 
                      aes(x = as.factor(clust), 
                          y = pourcentage, 
                          fill = factor(GranZona, levels = rev(unique(GranZona))),
                          tooltip = paste("Profile:", clust, 
                                         "<br>Zone:", GranZona, 
                                         "<br>Percentage:", round(pourcentage, 1), "%"),
                          data_id = paste(clust, GranZona, sep = "_"))) +
  geom_bar_interactive(position = "fill", stat = "identity") +
  theme_bw() +
  scale_fill_brewer(palette = "Set3", direction = -1) +
  labs(fill = "Residence locations\nby major zones\n(%)", y = "%", x = "profile")

# Display interactive graph
girafe(
  ggobj = gg_granzona,
  options = list(
    opts_sizing(rescale = TRUE,  # Allows resizing
                width = 1),      # Width relative to container
    opts_hover(
      css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"
    ),
    opts_tooltip(
      css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"
    ),
    opts_hover_inv(css = "opacity:0.7;")  # Other elements become slightly transparent
  )
)

1.8.6 Creation of horizontal bar charts to visualize, for each profile, a second set of contextual variables

# Prepare the dataframe
Contexte <- dataclust %>%
  group_by(clust) %>%
  summarise(
    num = n(),
    Nb_moy_voit = mean(NumAuto),
    Taille_menage = mean(NumPersHog),
    Pct_ICS12 = mean(Pct_ICS12),
    ParModUtam = mean(ParModUtam)
  ) %>%
  as.data.frame()

# Convert clust to factor for better display
Contexte$clust <- as.factor(Contexte$clust)

## Graph 1: Average number of cars per household
gg_voitures <- ggplot(Contexte, 
                      aes(x = Nb_moy_voit, 
                          y = clust,
                          tooltip = paste("Profile:", clust, 
                                        "<br>Avg number of cars:", round(Nb_moy_voit, 2)),
                          data_id = clust)) +
  geom_col_interactive(fill = "purple", width = 0.7) +
  theme_bw() +
  labs(title = "Average number of cars\nper household",
       x = "Average number",
       y = "Profile") +
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        axis.text.y = element_text(size = 9),
        axis.title = element_text(size = 9))

## Graph 2: Average household size
gg_taille <- ggplot(Contexte, 
                    aes(x = Taille_menage, 
                        y = clust,
                        tooltip = paste("Profile:", clust, 
                                      "<br>Average size:", round(Taille_menage, 2)),
                        data_id = clust)) +
  geom_col_interactive(fill = "blue", width = 0.7) +
  theme_bw() +
  labs(title = "Average household size",
       x = "Average size",
       y = "") +  # Remove y label to avoid duplicates
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        axis.text.y = element_blank(),  # Remove y labels
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 9))

## Graph 3: Percentage of ICS1 and ICS2 households
gg_ics <- ggplot(Contexte, 
                 aes(x = Pct_ICS12, 
                     y = clust,
                     tooltip = paste("Profile:", clust, 
                                   "<br>% ICS1 and ICS2:", round(Pct_ICS12, 1), "%"),
                     data_id = clust)) +
  geom_col_interactive(fill = "green", width = 0.7) +
  theme_bw() +
  labs(title = "% ICS1 and ICS2 households\nper residence zones",
       x = "Percentage (%)",
       y = "") +
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 9))

## Graph 4: Bike mode share
gg_velo <- ggplot(Contexte, 
                  aes(x = ParModUtam, 
                      y = clust,
                      tooltip = paste("Profile:", clust, 
                                    "<br>Bike mode share:", round(ParModUtam, 1), "%"),
                      data_id = clust)) +
  geom_col_interactive(fill = "darkred", width = 0.7) +
  theme_bw() +
  labs(title = "Bike mode share\nper residence zones",
       x = "Percentage (%)",
       y = "") +
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 9))

# Combine the 4 graphs side by side with patchwork
combined_plots1 <- (gg_voitures | gg_taille)+
  plot_annotation(title = "Profile characteristics",
                  theme = theme(plot.title = element_text(hjust = 0.5)))
combined_plots2 <- (gg_ics | gg_velo)+
  plot_annotation(title = "Profile characteristics",
                  theme = theme(plot.title = element_text(hjust = 0.5)))

# Display first two graphs
girafe(
  ggobj = combined_plots1,
  options = list(
    opts_sizing(rescale = TRUE, width = 1),  
    opts_hover(css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"),
    opts_tooltip(css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"),
    opts_hover_inv(css = "opacity:0.7;")
  )
)

# Display last two graphs
girafe(
  ggobj = combined_plots2,
  options = list(
    opts_sizing(rescale = TRUE, width = 1),  # Full width for the 4 graphs
    opts_hover(css = "stroke:red; stroke-width:3px; filter: drop-shadow(0 0 5px rgba(0,0,0,0.3));"),
    opts_tooltip(css = "background-color:black; color:white; padding:5px; border-radius:5px; font-family:monospace;"),
    opts_hover_inv(css = "opacity:0.7;")
  )
)

1.9 Preparation of the final table containing the results for export to an external spreadsheet

1.9.1 Extraction of household identifiers

# Useful later for retrieving the XY coordinates of the household's residence location
Cyclistes <- Cyclistes |>
  mutate(id_hog_pers = row.names(dataclust)) |>
  separate(id_hog_pers, into = c("id_hog", "id_pers"), sep = "-", remove = FALSE)

1.9.2 Retrieving the profile from the typology in the input table

Cyclistes$profil <- dataclust$clust

1.9.3 Exporting the table to MS Excel format

write.xlsx(Cyclistes, "TypoProfilCyclistesFINAL.xlsx")
#browseURL("TypoProfilCyclistesFINAL.xlsx")

2 Mapping the home and destination locations of cyclist profiles

2.1 Map of cyclist residence locations by profile

2.1.1 Data preparation for map creation

2.1.1.1 Importing XY coordinates of the residence location of households containing at least one cyclist

CoordXY_Menages <- read.xlsx("CoordXY_Menages.xlsx")

2.1.1.2 Joining XY coordinates of the residence location into the cyclists’ table

Cyclistes <- merge(Cyclistes, CoordXY_Menages, by = "id_hog")

2.1.1.3 Importing the UTAM layer (Territorial Units for Mobility Analysis)

UTAM <- st_read("EMU2019_UTAM_Bgta.gpkg")

## Reading layer `EMU2019_UTAM_Bgta_Carto_Variables_Nbre_Completo_v3' from data source `F:\Protocoles\Master 2\ASDT\2024-2025\1 - Analyses factorielles et classification\3_AFDM\FINAL\EMU2019_UTAM_Bgta.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 132 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 569984 ymin: 492795.1 xmax: 626083.9 ymax: 557354.6
## Projected CRS: WGS 84 / UTM zone 18N

2.1.1.4 Creating a point pattern (sf object) from the XY coordinates of cyclists’ residence locations

ResidCyclistes<- st_as_sf(Cyclistes, 
                          coords = c("X_Hog","Y_Hog"), 
                          crs = st_crs(UTAM)$srid)
plot(st_geometry(ResidCyclistes), pch = ".", main = "Residence location of cyclists surveyed in the EMU-2019 (Bogotá)", cex.main = 0.7)

2.1.1.5 Sorting profiles

# Necessary step to create smoothed density maps showing the concentration of cyclists' residence locations by profile
## To create a list of cyclist profiles, in order
Cyclistes$profil <- as.numeric(Cyclistes$profil)
Cyclistes <- Cyclistes[order(Cyclistes$profil, decreasing = FALSE), ]
ListProfils <- unique(Cyclistes$profil)

2.1.2 Series of density maps showing the concentration of cyclists’ residence locations by profile

# Definition of the number of clusters
nclass <- 6

# Creation of a set of color palettes in a "list" object
palettes <- list(
    pal1 = hcl.colors(nclass, "YlOrRd", rev = TRUE),
    pal2 = hcl.colors(nclass, "Greens 3", rev = TRUE),
    pal3 = hcl.colors(nclass, "Reds 3", rev = TRUE),
    pal4 = hcl.colors(nclass, "Grays", rev = TRUE),
    pal5 = hcl.colors(nclass, "Oslo", rev = FALSE),
    pal6 = hcl.colors(nclass, "Red-Purple", rev = TRUE),
    pal7 = hcl.colors(nclass, "Inferno", rev = TRUE),
    pal8 = hcl.colors(nclass, "Reds", rev = TRUE),
    pal9 = hcl.colors(nclass, "Blues 3", rev = TRUE),
    pal10 = hcl.colors(nclass, "Purples 3", rev = TRUE)
)

# To define the extent for smoothing, use the UTAM extent
Emprise <- as.owin(st_buffer(UTAM, 1000))

# Setting margins to insert the main title and titles for each map
par(oma = c(3.5,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5))

# To split the window into 2 rows and 3 columns (6 profiles)
par(mfrow = c(2,3))

# To define the smoothing radius (in m.)
rayon <- 1000

# To define the pixel size (in m.)
resol <- 100 # here 100m x 100m = 1 ha.

# Loop to produce and map a density surface by profile
for (i in ListProfils){
    
    # Retrieve datasets by profile
    fichier <- Cyclistes[which(Cyclistes$profil == i),]
    
    # To retrieve coordinates of residence locations
    pts <- st_coordinates(st_as_sf(fichier, coords = c("X_Hog","Y_Hog"), crs = st_crs(UTAM)$srid))
    
    # To create a ppp object (spatstat format) and incorporate the extent
    fichier.ppp <- ppp(pts[,1], pts[,2], window = Emprise)
    
    # To calculate the density surface (smoothing radius: 1000 m and spatial resolution of the image: 100m x 100m = 1 ha)
    cartelissee <- density(fichier.ppp, sigma = rayon, kernel = "gaussian", eps = resol)
    
    # Convert the smoothed surface to SpatRaster format
    cartelissee.raster <- rast(cartelissee)
    crs(cartelissee.raster) <- st_crs(UTAM)$srid # To specify a CRS for the raster object
    
    # Convert densities to counts (multiply densities by the area of the circle)
    values(cartelissee.raster) <- abs(values(cartelissee.raster) * pi * rayon **2)
    
    # Clip the raster to the UTAM extent
    cartelissee.raster <- mask(cartelissee.raster, UTAM)
    
    # Bin breaks (equal intervals)
    bks <- seq(from = min(values(cartelissee.raster), na.rm = TRUE), 
               to = max(values(cartelissee.raster), na.rm = TRUE), 
               by = (max(values(cartelissee.raster), na.rm = TRUE) - min(values(cartelissee.raster), na.rm = TRUE)) / nclass)
    
    # Plot the map
    plot(st_geometry(UTAM), border = NA, bg = "grey60")
    
    mf_raster(x = cartelissee.raster,
          var = "lyr.1",
          type = "interval", 
          breaks = bks,
          pal = unlist(palettes[(i)]),
          add = TRUE,
          leg_pos = "bottomright",
          leg_title = "Nb. of cyclist residence\nlocations within a\n1 km radius.\n", 
          leg_title_cex = 0.5,
          leg_val_cex = 0.45,
          leg_size = 0.4,
          leg_box_border = "white",
          leg_box_cex = c(2.5, 2),
          leg_adj = c(-2,1))
    
    title(main = paste("profile", i, sep = "-"))
    
    mtext(text = paste0("n = ", nrow(fichier), " cyclists"), 
          side = 3, line = -2, adj = 0.1, cex = 0.7)
    
}

mf_scale(
    lwd = 1.5,
    cex = 0.6,
    pos = "bottomleft",
    scale_units = "m"
)

mf_arrow(pos = "topright", adj = c(-3,-3))

# To display the main title and source
mtext("Residence locations of cyclists by profile in the Bogotá Metropolitan Area", cex = 1.3, side = 3, line = 1, adj = 0.5, outer = TRUE)
mtext("   Sources: EMU2019 - SDM - Smoothing radius: 1000 m and resolution: 1 ha - Package: mapsf", side = 1, line = 1, adj = 0, cex = 0.6, outer = TRUE)

2.2 Map of the destination locations of the first bicycle trip from home

Destination1erTrajet <- st_read("Centroides_ZAT_Destination_1er_Trajet_Depuis_Domicile.gpkg")

## Reading layer `Centroides_ZAT_Destination_1er_Trajet_Depuis_Domicile' from data source `F:\Protocoles\Master 2\ASDT\2024-2025\1 - Analyses factorielles et classification\3_AFDM\FINAL\Centroides_ZAT_Destination_1er_Trajet_Depuis_Domicile.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 3231 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 532626 ymin: 469041.5 xmax: 652779.8 ymax: 600655.7
## Projected CRS: WGS 84 / UTM zone 18N

2.2.1 Data preparation for map creation

2.2.1.1 Creating a list of cyclists with at least one known destination

listDestination1erTrajet <- as.character(Destination1erTrajet$id_concat)

2.2.1.2 Selecting cyclists with a known destination

Cyclistes <- Cyclistes[Cyclistes$id_hog_pers %in% listDestination1erTrajet, ]

2.2.1.3 Joining to retrieve the cyclist’s profile at the destination location

Destination1erTrajet <- merge(Destination1erTrajet, Cyclistes[c("profil", "id_hog_pers")], by.x = "id_concat", by.y = "id_hog_pers")

2.2.2 Creation of the second series of destination location maps

# Setting margins to insert the main title and titles for each map
par(oma = c(3.5,0,3,0)+0.1, mar = c(0, 0.5, 1.2, 0.5))

# To split the window into 2 rows and 3 columns (6 profiles)
par(mfrow = c(2,3))

# Definition of the number of clusters
nclass <- 6

# To define the smoothing radius (in m.)
rayon <- 1000
  
# To define the pixel size (in m.)
resol <- 100 # here 100m x 100m = 1 ha.

# Loop to produce and map a density surface by profile
for (i in ListProfils){
  
  # Retrieve datasets by profile
  fichier <- Destination1erTrajet[which(Destination1erTrajet$profil == i),]
  
  # To retrieve coordinates of destination locations
  pts <- st_coordinates(fichier)
  
  # To create a ppp object (spatstat format) and incorporate the extent
  fichier.ppp <- ppp(pts[,1], pts[,2], window = Emprise)
  
  # To calculate the density surface (smoothing radius: 1000 m and spatial resolution of the image: 100m x 100m = 1 ha)
  cartelissee <- density(fichier.ppp, sigma = rayon, kernel = "gaussian", eps = resol)
  
  # Convert the smoothed surface to SpatRaster format
  cartelissee.raster <- rast(cartelissee)
  crs(cartelissee.raster) <- st_crs(UTAM)$srid # To specify a CRS for the raster object

  # Convert densities to counts (multiply densities by the area of the circle)
  values(cartelissee.raster) <- abs(values(cartelissee.raster) * pi * rayon **2)

  # Clip the raster to the UTAM extent
  cartelissee.raster <- mask(cartelissee.raster, UTAM)

  # Bin breaks (equal intervals)
  bks <- seq(from = min(values(cartelissee.raster), na.rm = TRUE), 
            to = max(values(cartelissee.raster), na.rm = TRUE), 
            by = (max(values(cartelissee.raster), na.rm = TRUE) - min(values(cartelissee.raster), na.rm = TRUE)) / nclass)

    # Plot the map
    plot(st_geometry(UTAM), border = NA, bg = "grey60")
    
    mf_raster(x = cartelissee.raster,
          var = "lyr.1",
          type = "interval", 
          breaks = bks,
          pal = unlist(palettes[(i)]),
          add = TRUE,
          leg_pos = "bottomright",
          leg_title = "Nb. of destinations\nwithin a 1 km\nradius.\n", 
          leg_title_cex = 0.5,
          leg_val_cex = 0.45,
          leg_size = 0.4,
          leg_box_border = "white",
          leg_box_cex = c(2.5, 2),
          leg_adj = c(-2,1))
    
    title(main = paste("profile", i, sep = "-"))
    
    mtext(text = paste0("n = ", nrow(fichier), " cyclists"), 
          side = 3, line = -2, adj = 0.1, cex = 0.7)
    
}

mf_scale(
    lwd = 1.5,
    cex = 0.6,
    pos = "bottomleft",
    scale_units = "m"
)

mf_arrow(pos = "topright", adj = c(-3,-3))

# To display the main title and source
mtext("Destination of first bicycle trips from home in Bogotá", cex = 1.3, side = 3, line = 1, adj = 0.5, outer = TRUE)
mtext("   Sources: EMU2019 - SDM - Smoothing radius: 1000 m and resolution: 1 ha - Package: mapsf", side = 1, line = 1, adj = 0, cex = 0.6, outer = TRUE)

Profiles of cyclists and their spatial distribution in Bogotá

Maëlle Lucas and Florent Demoraes

18/11/2022 — Updated on 18/06/2026