En este documento se desarrollan los pasos para cargar datos, realizar un análisis exploratorio (EDA), ajustar modelos de regresión logística para diferentes subgrupos y visualizar resultados clave sobre el cambio de vestimenta por percepción de inseguridad.
A continuación, se cargan las librerías necesarias y se lee el archivo Excel con la base de datos limpia.
# Librerías principales
library(readxl)
library(dplyr)
library(ggplot2)
library(GGally)
library(corrplot)
library(DataExplorer)
# Leer datos desde Excel (ajustar ruta según entorno)
datos <- read_excel("BD_clean.xlsx")
# Selección de variables de interés
variables_interes <- c(
"T07P31", "B07P41", "C01P45", "P05P10", "P02P7", "P04P9",
"P07P23", "P01P6", "A02P5", "A04P12", "A03P11",
paste0("E0", 1:6, "P3", 4:9), # E01P34 a E06P39
"T08P32", "T04P16", "T02P14", "T01P13", "T05P17"
)
df <- datos %>% select(all_of(variables_interes))
# Inspección inicial
summary(df)
## T07P31 B07P41 C01P45 P05P10
## Min. :0.0000 Length:163 Length:163 Min. :1.000
## 1st Qu.:0.0000 Class :character Class :character 1st Qu.:4.000
## Median :0.0000 Mode :character Mode :character Median :5.000
## Mean :0.3497 Mean :4.466
## 3rd Qu.:1.0000 3rd Qu.:5.000
## Max. :1.0000 Max. :5.000
## P02P7 P04P9 P07P23 P01P6 A02P5
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.00 Min. :1.000
## 1st Qu.:3.000 1st Qu.:2.00 1st Qu.:3.000 1st Qu.:2.00 1st Qu.:1.000
## Median :4.000 Median :3.00 Median :3.000 Median :3.00 Median :2.000
## Mean :3.742 Mean :2.73 Mean :3.595 Mean :3.11 Mean :2.092
## 3rd Qu.:5.000 3rd Qu.:4.00 3rd Qu.:4.000 3rd Qu.:4.50 3rd Qu.:3.000
## Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.00 Max. :5.000
## A04P12 A03P11 E01P34 E02P35
## Min. :1.000 Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:1.500 1st Qu.:0.0000 1st Qu.:0.0000
## Median :4.000 Median :3.000 Median :0.0000 Median :0.0000
## Mean :3.583 Mean :3.215 Mean :0.1963 Mean :0.2577
## 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :5.000 Max. :5.000 Max. :1.0000 Max. :1.0000
## E03P36 E04P37 E05P38 E06P39
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.2331 Mean :0.5215 Mean :0.3681 Mean :0.3926
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## T08P32 T04P16 T02P14 T01P13
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:1.000
## Median :0.0000 Median :3.000 Median :4.000 Median :3.000
## Mean :0.3926 Mean :2.706 Mean :3.804 Mean :2.822
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :5.000 Max. :5.000
## T05P17
## Length:163
## Class :character
## Mode :character
##
##
##
str(df)
## tibble [163 × 22] (S3: tbl_df/tbl/data.frame)
## $ T07P31: num [1:163] 0 0 0 1 1 1 0 0 1 1 ...
## $ B07P41: chr [1:163] "4" "4" "4" "5" ...
## $ C01P45: chr [1:163] "Masculino" "Masculino" "Masculino" "Femenino" ...
## $ P05P10: num [1:163] 4 2 4 4 5 5 5 4 4 5 ...
## $ P02P7 : num [1:163] 5 3 5 4 5 4 3 5 5 5 ...
## $ P04P9 : num [1:163] 4 4 5 3 2 3 1 3 2 5 ...
## $ P07P23: num [1:163] 4 3 4 4 4 3 5 5 4 5 ...
## $ P01P6 : num [1:163] 2 3 1 3 4 3 3 4 1 4 ...
## $ A02P5 : num [1:163] 1 1 1 1 4 2 1 4 1 3 ...
## $ A04P12: num [1:163] 4 3 5 5 3 5 5 5 5 2 ...
## $ A03P11: num [1:163] 4 1 1 1 5 1 5 1 2 2 ...
## $ E01P34: num [1:163] 0 0 0 0 0 0 0 0 0 0 ...
## $ E02P35: num [1:163] 0 0 1 1 0 0 0 0 0 0 ...
## $ E03P36: num [1:163] 0 0 0 0 1 0 0 0 0 0 ...
## $ E04P37: num [1:163] 0 0 0 0 0 0 0 0 0 1 ...
## $ E05P38: num [1:163] 0 0 1 0 0 1 0 0 0 0 ...
## $ E06P39: num [1:163] 0 0 0 0 1 1 1 0 0 0 ...
## $ T08P32: num [1:163] 0 1 0 0 1 1 1 0 1 1 ...
## $ T04P16: num [1:163] 5 3 3 4 2 2 2 1 4 1 ...
## $ T02P14: num [1:163] 2 1 2 5 4 5 3 4 4 5 ...
## $ T01P13: num [1:163] 5 1 1 3 4 3 2 3 3 3 ...
## $ T05P17: chr [1:163] "Transmilenio" "SITP Zonal" "Bicicleta (Propia o Tembici)" "Automóvil particular (Como conductor, pasajero o en carro compartido)" ...
Se clasifican las variables en continuas y categóricas para aplicar gráficos adecuados.
tipos <- sapply(df, class)
continuas <- names(df)[tipos %in% c("numeric","integer")]
categoricas <- names(df)[tipos %in% c("factor","character")]
for (var in continuas) {
print(
ggplot(df, aes_string(x = var)) +
geom_histogram(bins = 30, fill = "skyblue", color = "black") +
theme_minimal() +
labs(title = paste("Histograma de", var))
)
}
for (var in categoricas) {
print(
ggplot(df, aes_string(x = var)) +
geom_bar(fill = "tomato", color = "black") +
theme_minimal() +
labs(title = paste("Gráfico de barras de", var))
)
}
# Pairplot (hasta 10 variables continuas)
if (length(continuas) <= 10) {
print(ggpairs(df[, continuas]))
} else {
print(ggpairs(df[, continuas[1:10]]))
}
# Matriz de correlación
cor_matrix <- cor(df[, continuas], use = "pairwise.complete.obs")
corrplot(cor_matrix, method = "color", type = "upper",
tl.col = "black", tl.cex = 0.8, addCoef.col = "black", number.cex = 0.7)
Se crean dos subconjuntos según modo de transporte y se cuentan las observaciones de cambio de vestimenta y género.
MovilidadSostenible <- df %>%
filter(T05P17 %in% c("Transmilenio","SITP Zonal","Bicicleta (Propia o Tembici)","Caminata"))
TransportePrivado <- df %>%
filter(T05P17 %in% c("Automóvil particular (Como conductor, pasajero o en carro compartido)","Motocicleta"))
contar_vars <- function(data, nombre) {
cat("---", nombre, "---\n")
cat("Conteo T07P31:\n"); print(table(data$T07P31))
cat("Conteo C01P45:\n"); print(table(data$C01P45))
}
contar_vars(MovilidadSostenible, "Movilidad Sostenible")
## --- Movilidad Sostenible ---
## Conteo T07P31:
##
## 0 1
## 63 25
## Conteo C01P45:
##
## Femenino Masculino Prefiero no responder
## 42 45 1
contar_vars(TransportePrivado, "Transporte Privado")
## --- Transporte Privado ---
## Conteo T07P31:
##
## 0 1
## 39 29
## Conteo C01P45:
##
## Femenino Masculino Prefiero no responder
## 27 39 2
Se ajusta un modelo logístico con todas las variables candidatas.
formula_logit <- as.formula(
"T07P31 ~ B07P41 + C01P45 + P05P10 + P02P7 + P04P9 + P07P23 +
P01P6 + A02P5 + A04P12 + A03P11 + E01P34 + E02P35 +
E03P36 + E04P37 + E05P38 + E06P39 + T04P16 + T02P14 + T01P13"
)
modelo <- glm(formula_logit, data = df, family = binomial)
summary(modelo)
##
## Call:
## glm(formula = formula_logit, family = binomial, data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.53720 2.16385 -0.710 0.47745
## B07P413 1.04413 1.33489 0.782 0.43411
## B07P414 1.52084 1.30403 1.166 0.24351
## B07P415 0.61156 1.32902 0.460 0.64540
## B07P416 0.32106 1.35623 0.237 0.81287
## B07P41No sé 0.60154 1.48125 0.406 0.68467
## C01P45Masculino -1.61627 0.50228 -3.218 0.00129 **
## C01P45Prefiero no responder -16.97045 1177.64553 -0.014 0.98850
## P05P10 0.01174 0.26988 0.044 0.96530
## P02P7 0.13383 0.23941 0.559 0.57614
## P04P9 0.07168 0.16864 0.425 0.67082
## P07P23 -0.28825 0.28321 -1.018 0.30879
## P01P6 0.05816 0.17091 0.340 0.73364
## A02P5 0.19000 0.18337 1.036 0.30014
## A04P12 -0.12241 0.17067 -0.717 0.47321
## A03P11 0.01113 0.14100 0.079 0.93707
## E01P34 -0.33089 0.57779 -0.573 0.56686
## E02P35 0.26729 0.54064 0.494 0.62102
## E03P36 0.41679 0.55742 0.748 0.45464
## E04P37 -0.21260 0.46177 -0.460 0.64523
## E05P38 -0.44551 0.50759 -0.878 0.38012
## E06P39 -0.19038 0.52163 -0.365 0.71513
## T04P16 -0.20190 0.15549 -1.298 0.19413
## T02P14 0.31088 0.20662 1.505 0.13243
## T01P13 0.17663 0.16531 1.068 0.28533
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 211.01 on 162 degrees of freedom
## Residual deviance: 169.77 on 138 degrees of freedom
## AIC: 219.77
##
## Number of Fisher Scoring iterations: 15
exp(coef(modelo))
## (Intercept) B07P413
## 2.149815e-01 2.840932e+00
## B07P414 B07P415
## 4.576064e+00 1.843303e+00
## B07P416 B07P41No sé
## 1.378583e+00 1.824935e+00
## C01P45Masculino C01P45Prefiero no responder
## 1.986376e-01 4.264103e-08
## P05P10 P02P7
## 1.011811e+00 1.143203e+00
## P04P9 P07P23
## 1.074307e+00 7.495776e-01
## P01P6 A02P5
## 1.059885e+00 1.209251e+00
## A04P12 A03P11
## 8.847826e-01 1.011193e+00
## E01P34 E02P35
## 7.182809e-01 1.306424e+00
## E03P36 E04P37
## 1.517083e+00 8.084774e-01
## E05P38 E06P39
## 6.405005e-01 8.266460e-01
## T04P16 T02P14
## 8.171777e-01 1.364628e+00
## T01P13
## 1.193184e+00
Se repite el ajuste para Movilidad Sostenible y Transporte Privado.
modelo_ms <- glm(formula_logit, data = MovilidadSostenible, family = binomial)
modelo_tp <- glm(formula_logit, data = TransportePrivado, family = binomial)
summary(modelo_ms); exp(coef(modelo_ms))
##
## Call:
## glm(formula = formula_logit, family = binomial, data = MovilidadSostenible)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.17759 5.33547 0.783 0.43364
## B07P413 0.85105 2.08255 0.409 0.68279
## B07P414 3.12664 2.15493 1.451 0.14680
## B07P415 -1.61930 2.36074 -0.686 0.49276
## B07P416 -0.18053 2.25717 -0.080 0.93625
## B07P41No sé -0.56395 2.71027 -0.208 0.83517
## C01P45Masculino -6.45399 2.09490 -3.081 0.00206 **
## C01P45Prefiero no responder -19.84132 2399.54624 -0.008 0.99340
## P05P10 -0.59690 0.84975 -0.702 0.48240
## P02P7 1.04807 0.62604 1.674 0.09411 .
## P04P9 -0.87225 0.43915 -1.986 0.04701 *
## P07P23 -1.63375 0.72304 -2.260 0.02385 *
## P01P6 -0.02668 0.43457 -0.061 0.95105
## A02P5 0.76737 0.45456 1.688 0.09138 .
## A04P12 0.04357 0.49550 0.088 0.92994
## A03P11 -0.33063 0.33800 -0.978 0.32799
## E01P34 0.99443 1.26898 0.784 0.43325
## E02P35 0.57607 1.28420 0.449 0.65373
## E03P36 -1.05370 1.16350 -0.906 0.36513
## E04P37 -0.21507 1.09266 -0.197 0.84396
## E05P38 -0.64485 1.14258 -0.564 0.57250
## E06P39 0.28968 1.09215 0.265 0.79082
## T04P16 -0.30049 0.46460 -0.647 0.51778
## T02P14 0.21363 0.40568 0.527 0.59847
## T01P13 0.78373 0.45864 1.709 0.08749 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 105.033 on 87 degrees of freedom
## Residual deviance: 52.517 on 63 degrees of freedom
## AIC: 102.52
##
## Number of Fisher Scoring iterations: 15
## (Intercept) B07P413
## 6.520826e+01 2.342103e+00
## B07P414 B07P415
## 2.279733e+01 1.980367e-01
## B07P416 B07P41No sé
## 8.348304e-01 5.689544e-01
## C01P45Masculino C01P45Prefiero no responder
## 1.574226e-03 2.415603e-09
## P05P10 P02P7
## 5.505158e-01 2.852154e+00
## P04P9 P07P23
## 4.180082e-01 1.951971e-01
## P01P6 A02P5
## 9.736769e-01 2.154088e+00
## A04P12 A03P11
## 1.044530e+00 7.184745e-01
## E01P34 E02P35
## 2.703176e+00 1.779037e+00
## E03P36 E04P37
## 3.486456e-01 8.064875e-01
## E05P38 E06P39
## 5.247414e-01 1.336003e+00
## T04P16 T02P14
## 7.404573e-01 1.238163e+00
## T01P13
## 2.189621e+00
summary(modelo_tp); exp(coef(modelo_tp))
##
## Call:
## glm(formula = formula_logit, family = binomial, data = TransportePrivado)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.72942 4.21053 1.123 0.2613
## B07P414 0.15249 1.65244 0.092 0.9265
## B07P415 0.49228 1.45666 0.338 0.7354
## B07P416 -0.53188 1.51741 -0.351 0.7259
## B07P41No sé 0.72722 2.51544 0.289 0.7725
## C01P45Masculino -0.09537 0.81558 -0.117 0.9069
## C01P45Prefiero no responder -15.14229 1565.50251 -0.010 0.9923
## P05P10 -0.55928 0.46672 -1.198 0.2308
## P02P7 0.01271 0.55185 0.023 0.9816
## P04P9 0.50000 0.32916 1.519 0.1288
## P07P23 -0.41628 0.63848 -0.652 0.5144
## P01P6 -0.23878 0.30687 -0.778 0.4365
## A02P5 -0.25415 0.33981 -0.748 0.4545
## A04P12 -0.67898 0.32895 -2.064 0.0390 *
## A03P11 0.11504 0.25146 0.457 0.6473
## E01P34 -1.23423 1.12303 -1.099 0.2718
## E02P35 0.26703 1.09288 0.244 0.8070
## E03P36 3.31825 1.43117 2.319 0.0204 *
## E04P37 -0.96860 1.06978 -0.905 0.3652
## E05P38 0.11724 1.22195 0.096 0.9236
## E06P39 -0.78047 1.22490 -0.637 0.5240
## T04P16 -0.33222 0.25115 -1.323 0.1859
## T02P14 0.50853 0.44905 1.132 0.2574
## T01P13 0.08661 0.27818 0.311 0.7555
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 92.792 on 67 degrees of freedom
## Residual deviance: 64.053 on 44 degrees of freedom
## AIC: 112.05
##
## Number of Fisher Scoring iterations: 15
## (Intercept) B07P414
## 1.132304e+02 1.164728e+00
## B07P415 B07P416
## 1.636049e+00 5.874999e-01
## B07P41No sé C01P45Masculino
## 2.069323e+00 9.090374e-01
## C01P45Prefiero no responder P05P10
## 2.653306e-07 5.716208e-01
## P02P7 P04P9
## 1.012789e+00 1.648724e+00
## P07P23 P01P6
## 6.594924e-01 7.875850e-01
## A02P5 A04P12
## 7.755735e-01 5.071328e-01
## A03P11 E01P34
## 1.121915e+00 2.910601e-01
## E02P35 E03P36
## 1.306083e+00 2.761199e+01
## E04P37 E05P38
## 3.796130e-01 1.124391e+00
## E06P39 T04P16
## 4.581894e-01 7.173323e-01
## T02P14 T01P13
## 1.662847e+00 1.090469e+00
Función para calcular el R² y resultados para cada modelo.
pseudo_r2 <- function(m) {
full <- logLik(m)
null <- logLik(update(m, . ~ 1))
1 - as.numeric(full / null)
}
cat("R² General: ", pseudo_r2(modelo), "\n")
## R² General: 0.1954445
cat("R² Movilidad Sostenible: ", pseudo_r2(modelo_ms), "\n")
## R² Movilidad Sostenible: 0.4999911
cat("R² Transporte Privado: ", pseudo_r2(modelo_tp), "\n")
## R² Transporte Privado: 0.309712
A continuación se emplea la función
variables_significativas_seguras para extraer predictores
con p < 0.20 y reajustar los modelos reducidos correspondientes.
# Función para identificar variables significativas (p < 0.20)
variables_significativas_seguras <- function(modelo, datos, umbral = 0.2) {
coefs <- summary(modelo)$coefficients
filas <- rownames(coefs)[coefs[,4] < umbral & rownames(coefs) != "(Intercept)"]
# Limpiar nombres base (antes de puntos o interacciones)
base <- unique(sapply(strsplit(filas, "[\\.:]"), `[[`, 1))
# Filtrar sólo nombres presentes en datos
intersect(base, names(datos))
}
# Obtener predictores con p < 0.20 para cada modelo
gen_sig <- variables_significativas_seguras(modelo, df)
ms_sig <- variables_significativas_seguras(modelo_ms, MovilidadSostenible)
tp_sig <- variables_significativas_seguras(modelo_tp, TransportePrivado)
# Ajustar y mostrar modelos reducidos sólo si hay predictores
# General reducido
if (length(gen_sig) > 0) {
form_gen_sig <- as.formula(paste("T07P31 ~", paste(gen_sig, collapse = " + ")))
modelo_general_sig <- glm(form_gen_sig, data = df, family = binomial)
cat("## General reducido (p<0.20)
")
print(summary(modelo_general_sig))
cat("McFadden R²:", pseudo_r2(modelo_general_sig), "
")
} else {
cat("No hay predictores con p < 0.20 para el modelo general reducido.
")
}
## ## General reducido (p<0.20)
##
## Call:
## glm(formula = form_gen_sig, family = binomial, data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.852717 0.683922 -2.709 0.00675 **
## T04P16 0.004969 0.117803 0.042 0.96636
## T02P14 0.313169 0.139467 2.245 0.02474 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 211.01 on 162 degrees of freedom
## Residual deviance: 205.50 on 160 degrees of freedom
## AIC: 211.5
##
## Number of Fisher Scoring iterations: 4
##
## McFadden R²: 0.0260739
# Movilidad Sostenible reducido
if (length(ms_sig) > 0) {
form_ms_sig <- as.formula(paste("T07P31 ~", paste(ms_sig, collapse = " + ")))
modelo_ms_sig <- glm(form_ms_sig, data = MovilidadSostenible, family = binomial)
cat("## Movilidad Sostenible reducido (p<0.20)
")
print(summary(modelo_ms_sig))
cat("McFadden R²:", pseudo_r2(modelo_ms_sig), "
")
} else {
cat("No hay predictores con p < 0.20 para el modelo MS reducido.
")
}
## ## Movilidad Sostenible reducido (p<0.20)
##
## Call:
## glm(formula = form_ms_sig, family = binomial, data = MovilidadSostenible)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4478 1.3803 -0.324 0.746
## P02P7 0.1956 0.3027 0.646 0.518
## P04P9 -0.3679 0.2368 -1.554 0.120
## P07P23 -0.5339 0.3714 -1.437 0.151
## A02P5 0.3211 0.2581 1.244 0.213
## T01P13 0.2860 0.2150 1.330 0.183
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 105.033 on 87 degrees of freedom
## Residual deviance: 88.869 on 82 degrees of freedom
## AIC: 100.87
##
## Number of Fisher Scoring iterations: 4
##
## McFadden R²: 0.1538866
# Transporte Privado reducido
if (length(tp_sig) > 0) {
form_tp_sig <- as.formula(paste("T07P31 ~", paste(tp_sig, collapse = " + ")))
modelo_tp_sig <- glm(form_tp_sig, data = TransportePrivado, family = binomial)
cat("## Transporte Privado reducido (p<0.20)
")
print(summary(modelo_tp_sig))
cat("McFadden R²:", pseudo_r2(modelo_tp_sig), "
")
} else {
cat("No hay predictores con p < 0.20 para el modelo TP reducido.
")
}
## ## Transporte Privado reducido (p<0.20)
##
## Call:
## glm(formula = form_tp_sig, family = binomial, data = TransportePrivado)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1717 1.0604 -0.162 0.8713
## P04P9 0.4082 0.2236 1.825 0.0679 .
## A04P12 -0.2971 0.2161 -1.375 0.1690
## E03P36 2.1261 0.8552 2.486 0.0129 *
## T04P16 -0.2115 0.1891 -1.119 0.2632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 92.792 on 67 degrees of freedom
## Residual deviance: 78.149 on 63 degrees of freedom
## AIC: 88.149
##
## Number of Fisher Scoring iterations: 3
##
## McFadden R²: 0.1578038
Se grafican porcentajes de cambio de vestimenta para la muestra total, por modo y por género.
datos_vis <- datos %>%
mutate(
cambio = factor(T07P31, levels=c(0,1), labels=c("No cambia","Cambia")),
modo = case_when(
T05P17 %in% c("Transmilenio","SITP Zonal","Bicicleta (Propia o Tembici)","Caminata") ~ "Movilidad Sostenible",
T05P17 %in% c("Automóvil particular (Como conductor, pasajero o en carro compartido)","Motocicleta") ~ "Transporte Privado",
TRUE ~ NA_character_
)
)
# Total
datos_vis %>%
count(cambio) %>%
mutate(pct = n/sum(n)*100) %>%
ggplot(aes(cambio,pct,fill=cambio)) +
geom_col(show.legend=FALSE) +
geom_text(aes(label=paste0(sprintf("%.1f",pct),"%")), vjust=-0.5) +
labs(title="Cambio de vestimenta - Total", y="%") +
theme_minimal()
# Por modo
datos_vis %>%
filter(!is.na(modo)) %>%
count(modo,cambio) %>%
group_by(modo) %>%
mutate(pct=n/sum(n)*100) %>%
ggplot(aes(modo,pct,fill=cambio)) +
geom_col(position="dodge") +
geom_text(aes(label=paste0(sprintf("%.1f",pct),"%")),
position=position_dodge(0.9), vjust=-0.3) +
labs(title="Cambio de vestimenta por modo", x="Modo", y="%") +
theme_minimal() + theme(axis.text.x=element_text(angle=45,hjust=1))
# Por género
datos_vis %>%
count(C01P45,cambio) %>%
filter(C01P45!="Prefiero no responder") %>%
group_by(C01P45) %>%
mutate(pct=n/sum(n)*100) %>%
ggplot(aes(C01P45,pct,fill=cambio)) +
geom_col(position="dodge") +
geom_text(aes(label=paste0(sprintf("%.1f",pct),"%")),
position=position_dodge(0.9), vjust=-0.3) +
labs(title="Cambio de vestimenta por género", x="Género", y="%") +
theme_minimal()