1. Introducción

En este documento se desarrollan los pasos para cargar datos, realizar un análisis exploratorio (EDA), ajustar modelos de regresión logística para diferentes subgrupos y visualizar resultados clave sobre el cambio de vestimenta por percepción de inseguridad.

1.1 Carga de librerías y datos

A continuación, se cargan las librerías necesarias y se lee el archivo Excel con la base de datos limpia.

# Librerías principales
library(readxl)
library(dplyr)
library(ggplot2)
library(GGally)
library(corrplot)
library(DataExplorer)

# Leer datos desde Excel (ajustar ruta según entorno)
datos <- read_excel("BD_clean.xlsx")

# Selección de variables de interés
variables_interes <- c(
  "T07P31", "B07P41", "C01P45", "P05P10", "P02P7", "P04P9", 
  "P07P23", "P01P6", "A02P5", "A04P12", "A03P11", 
  paste0("E0", 1:6, "P3", 4:9), # E01P34 a E06P39
  "T08P32", "T04P16", "T02P14", "T01P13", "T05P17"
)

df <- datos %>% select(all_of(variables_interes))

# Inspección inicial
summary(df)

##      T07P31          B07P41             C01P45              P05P10     
##  Min.   :0.0000   Length:163         Length:163         Min.   :1.000  
##  1st Qu.:0.0000   Class :character   Class :character   1st Qu.:4.000  
##  Median :0.0000   Mode  :character   Mode  :character   Median :5.000  
##  Mean   :0.3497                                         Mean   :4.466  
##  3rd Qu.:1.0000                                         3rd Qu.:5.000  
##  Max.   :1.0000                                         Max.   :5.000  
##      P02P7           P04P9          P07P23          P01P6          A02P5      
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.00   1st Qu.:3.000   1st Qu.:2.00   1st Qu.:1.000  
##  Median :4.000   Median :3.00   Median :3.000   Median :3.00   Median :2.000  
##  Mean   :3.742   Mean   :2.73   Mean   :3.595   Mean   :3.11   Mean   :2.092  
##  3rd Qu.:5.000   3rd Qu.:4.00   3rd Qu.:4.000   3rd Qu.:4.50   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.00   Max.   :5.000  
##      A04P12          A03P11          E01P34           E02P35      
##  Min.   :1.000   Min.   :1.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:3.000   1st Qu.:1.500   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :4.000   Median :3.000   Median :0.0000   Median :0.0000  
##  Mean   :3.583   Mean   :3.215   Mean   :0.1963   Mean   :0.2577  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :5.000   Max.   :5.000   Max.   :1.0000   Max.   :1.0000  
##      E03P36           E04P37           E05P38           E06P39      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2331   Mean   :0.5215   Mean   :0.3681   Mean   :0.3926  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      T08P32           T04P16          T02P14          T01P13     
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:3.000   1st Qu.:1.000  
##  Median :0.0000   Median :3.000   Median :4.000   Median :3.000  
##  Mean   :0.3926   Mean   :2.706   Mean   :3.804   Mean   :2.822  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##     T05P17         
##  Length:163        
##  Class :character  
##  Mode  :character  
##                    
##                    
##

str(df)

## tibble [163 × 22] (S3: tbl_df/tbl/data.frame)
##  $ T07P31: num [1:163] 0 0 0 1 1 1 0 0 1 1 ...
##  $ B07P41: chr [1:163] "4" "4" "4" "5" ...
##  $ C01P45: chr [1:163] "Masculino" "Masculino" "Masculino" "Femenino" ...
##  $ P05P10: num [1:163] 4 2 4 4 5 5 5 4 4 5 ...
##  $ P02P7 : num [1:163] 5 3 5 4 5 4 3 5 5 5 ...
##  $ P04P9 : num [1:163] 4 4 5 3 2 3 1 3 2 5 ...
##  $ P07P23: num [1:163] 4 3 4 4 4 3 5 5 4 5 ...
##  $ P01P6 : num [1:163] 2 3 1 3 4 3 3 4 1 4 ...
##  $ A02P5 : num [1:163] 1 1 1 1 4 2 1 4 1 3 ...
##  $ A04P12: num [1:163] 4 3 5 5 3 5 5 5 5 2 ...
##  $ A03P11: num [1:163] 4 1 1 1 5 1 5 1 2 2 ...
##  $ E01P34: num [1:163] 0 0 0 0 0 0 0 0 0 0 ...
##  $ E02P35: num [1:163] 0 0 1 1 0 0 0 0 0 0 ...
##  $ E03P36: num [1:163] 0 0 0 0 1 0 0 0 0 0 ...
##  $ E04P37: num [1:163] 0 0 0 0 0 0 0 0 0 1 ...
##  $ E05P38: num [1:163] 0 0 1 0 0 1 0 0 0 0 ...
##  $ E06P39: num [1:163] 0 0 0 0 1 1 1 0 0 0 ...
##  $ T08P32: num [1:163] 0 1 0 0 1 1 1 0 1 1 ...
##  $ T04P16: num [1:163] 5 3 3 4 2 2 2 1 4 1 ...
##  $ T02P14: num [1:163] 2 1 2 5 4 5 3 4 4 5 ...
##  $ T01P13: num [1:163] 5 1 1 3 4 3 2 3 3 3 ...
##  $ T05P17: chr [1:163] "Transmilenio" "SITP Zonal" "Bicicleta (Propia o Tembici)" "Automóvil particular (Como conductor, pasajero o en carro compartido)" ...

2. Análisis Exploratorio de Datos (EDA)

2.1 Identificación de tipos de variables

Se clasifican las variables en continuas y categóricas para aplicar gráficos adecuados.

tipos <- sapply(df, class)
continuas <- names(df)[tipos %in% c("numeric","integer")]
categoricas <- names(df)[tipos %in% c("factor","character")]

2.2 Distribuciones univariadas

Histogramas de variables numéricas

for (var in continuas) {
  print(
    ggplot(df, aes_string(x = var)) +
      geom_histogram(bins = 30, fill = "skyblue", color = "black") +
      theme_minimal() +
      labs(title = paste("Histograma de", var))
  )
}

Barras de variables categóricas

for (var in categoricas) {
  print(
    ggplot(df, aes_string(x = var)) +
      geom_bar(fill = "tomato", color = "black") +
      theme_minimal() +
      labs(title = paste("Gráfico de barras de", var))
  )
}

2.3 Matriz de correlación y pairplot

# Pairplot (hasta 10 variables continuas)
if (length(continuas) <= 10) {
  print(ggpairs(df[, continuas]))
} else {
  print(ggpairs(df[, continuas[1:10]]))
}

# Matriz de correlación
cor_matrix <- cor(df[, continuas], use = "pairwise.complete.obs")
corrplot(cor_matrix, method = "color", type = "upper",
         tl.col = "black", tl.cex = 0.8, addCoef.col = "black", number.cex = 0.7)

3. Conteo por subgrupos

Se crean dos subconjuntos según modo de transporte y se cuentan las observaciones de cambio de vestimenta y género.

MovilidadSostenible <- df %>%
  filter(T05P17 %in% c("Transmilenio","SITP Zonal","Bicicleta (Propia o Tembici)","Caminata"))

TransportePrivado <- df %>%
  filter(T05P17 %in% c("Automóvil particular (Como conductor, pasajero o en carro compartido)","Motocicleta"))

contar_vars <- function(data, nombre) {
  cat("---", nombre, "---\n")
  cat("Conteo T07P31:\n"); print(table(data$T07P31))
  cat("Conteo C01P45:\n"); print(table(data$C01P45))
}

contar_vars(MovilidadSostenible, "Movilidad Sostenible")

## --- Movilidad Sostenible ---
## Conteo T07P31:
## 
##  0  1 
## 63 25 
## Conteo C01P45:
## 
##              Femenino             Masculino Prefiero no responder 
##                    42                    45                     1

contar_vars(TransportePrivado, "Transporte Privado")

## --- Transporte Privado ---
## Conteo T07P31:
## 
##  0  1 
## 39 29 
## Conteo C01P45:
## 
##              Femenino             Masculino Prefiero no responder 
##                    27                    39                     2

4. Modelos de regresión logística

4.1 Definición de fórmula y modelo general

Se ajusta un modelo logístico con todas las variables candidatas.

formula_logit <- as.formula(
  "T07P31 ~ B07P41 + C01P45 + P05P10 + P02P7 + P04P9 + P07P23 +
   P01P6 + A02P5 + A04P12 + A03P11 + E01P34 + E02P35 +
   E03P36 + E04P37 + E05P38 + E06P39 + T04P16 + T02P14 + T01P13"
)
modelo <- glm(formula_logit, data = df, family = binomial)
summary(modelo)

## 
## Call:
## glm(formula = formula_logit, family = binomial, data = df)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                   -1.53720    2.16385  -0.710  0.47745   
## B07P413                        1.04413    1.33489   0.782  0.43411   
## B07P414                        1.52084    1.30403   1.166  0.24351   
## B07P415                        0.61156    1.32902   0.460  0.64540   
## B07P416                        0.32106    1.35623   0.237  0.81287   
## B07P41No sé                    0.60154    1.48125   0.406  0.68467   
## C01P45Masculino               -1.61627    0.50228  -3.218  0.00129 **
## C01P45Prefiero no responder  -16.97045 1177.64553  -0.014  0.98850   
## P05P10                         0.01174    0.26988   0.044  0.96530   
## P02P7                          0.13383    0.23941   0.559  0.57614   
## P04P9                          0.07168    0.16864   0.425  0.67082   
## P07P23                        -0.28825    0.28321  -1.018  0.30879   
## P01P6                          0.05816    0.17091   0.340  0.73364   
## A02P5                          0.19000    0.18337   1.036  0.30014   
## A04P12                        -0.12241    0.17067  -0.717  0.47321   
## A03P11                         0.01113    0.14100   0.079  0.93707   
## E01P34                        -0.33089    0.57779  -0.573  0.56686   
## E02P35                         0.26729    0.54064   0.494  0.62102   
## E03P36                         0.41679    0.55742   0.748  0.45464   
## E04P37                        -0.21260    0.46177  -0.460  0.64523   
## E05P38                        -0.44551    0.50759  -0.878  0.38012   
## E06P39                        -0.19038    0.52163  -0.365  0.71513   
## T04P16                        -0.20190    0.15549  -1.298  0.19413   
## T02P14                         0.31088    0.20662   1.505  0.13243   
## T01P13                         0.17663    0.16531   1.068  0.28533   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 211.01  on 162  degrees of freedom
## Residual deviance: 169.77  on 138  degrees of freedom
## AIC: 219.77
## 
## Number of Fisher Scoring iterations: 15

exp(coef(modelo))

##                 (Intercept)                     B07P413 
##                2.149815e-01                2.840932e+00 
##                     B07P414                     B07P415 
##                4.576064e+00                1.843303e+00 
##                     B07P416                 B07P41No sé 
##                1.378583e+00                1.824935e+00 
##             C01P45Masculino C01P45Prefiero no responder 
##                1.986376e-01                4.264103e-08 
##                      P05P10                       P02P7 
##                1.011811e+00                1.143203e+00 
##                       P04P9                      P07P23 
##                1.074307e+00                7.495776e-01 
##                       P01P6                       A02P5 
##                1.059885e+00                1.209251e+00 
##                      A04P12                      A03P11 
##                8.847826e-01                1.011193e+00 
##                      E01P34                      E02P35 
##                7.182809e-01                1.306424e+00 
##                      E03P36                      E04P37 
##                1.517083e+00                8.084774e-01 
##                      E05P38                      E06P39 
##                6.405005e-01                8.266460e-01 
##                      T04P16                      T02P14 
##                8.171777e-01                1.364628e+00 
##                      T01P13 
##                1.193184e+00

4.2 Modelos por subgrupo modal

Se repite el ajuste para Movilidad Sostenible y Transporte Privado.

modelo_ms  <- glm(formula_logit, data = MovilidadSostenible, family = binomial)
modelo_tp  <- glm(formula_logit, data = TransportePrivado,   family = binomial)
summary(modelo_ms);  exp(coef(modelo_ms))

## 
## Call:
## glm(formula = formula_logit, family = binomial, data = MovilidadSostenible)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                    4.17759    5.33547   0.783  0.43364   
## B07P413                        0.85105    2.08255   0.409  0.68279   
## B07P414                        3.12664    2.15493   1.451  0.14680   
## B07P415                       -1.61930    2.36074  -0.686  0.49276   
## B07P416                       -0.18053    2.25717  -0.080  0.93625   
## B07P41No sé                   -0.56395    2.71027  -0.208  0.83517   
## C01P45Masculino               -6.45399    2.09490  -3.081  0.00206 **
## C01P45Prefiero no responder  -19.84132 2399.54624  -0.008  0.99340   
## P05P10                        -0.59690    0.84975  -0.702  0.48240   
## P02P7                          1.04807    0.62604   1.674  0.09411 . 
## P04P9                         -0.87225    0.43915  -1.986  0.04701 * 
## P07P23                        -1.63375    0.72304  -2.260  0.02385 * 
## P01P6                         -0.02668    0.43457  -0.061  0.95105   
## A02P5                          0.76737    0.45456   1.688  0.09138 . 
## A04P12                         0.04357    0.49550   0.088  0.92994   
## A03P11                        -0.33063    0.33800  -0.978  0.32799   
## E01P34                         0.99443    1.26898   0.784  0.43325   
## E02P35                         0.57607    1.28420   0.449  0.65373   
## E03P36                        -1.05370    1.16350  -0.906  0.36513   
## E04P37                        -0.21507    1.09266  -0.197  0.84396   
## E05P38                        -0.64485    1.14258  -0.564  0.57250   
## E06P39                         0.28968    1.09215   0.265  0.79082   
## T04P16                        -0.30049    0.46460  -0.647  0.51778   
## T02P14                         0.21363    0.40568   0.527  0.59847   
## T01P13                         0.78373    0.45864   1.709  0.08749 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 105.033  on 87  degrees of freedom
## Residual deviance:  52.517  on 63  degrees of freedom
## AIC: 102.52
## 
## Number of Fisher Scoring iterations: 15

##                 (Intercept)                     B07P413 
##                6.520826e+01                2.342103e+00 
##                     B07P414                     B07P415 
##                2.279733e+01                1.980367e-01 
##                     B07P416                 B07P41No sé 
##                8.348304e-01                5.689544e-01 
##             C01P45Masculino C01P45Prefiero no responder 
##                1.574226e-03                2.415603e-09 
##                      P05P10                       P02P7 
##                5.505158e-01                2.852154e+00 
##                       P04P9                      P07P23 
##                4.180082e-01                1.951971e-01 
##                       P01P6                       A02P5 
##                9.736769e-01                2.154088e+00 
##                      A04P12                      A03P11 
##                1.044530e+00                7.184745e-01 
##                      E01P34                      E02P35 
##                2.703176e+00                1.779037e+00 
##                      E03P36                      E04P37 
##                3.486456e-01                8.064875e-01 
##                      E05P38                      E06P39 
##                5.247414e-01                1.336003e+00 
##                      T04P16                      T02P14 
##                7.404573e-01                1.238163e+00 
##                      T01P13 
##                2.189621e+00

summary(modelo_tp);  exp(coef(modelo_tp))

## 
## Call:
## glm(formula = formula_logit, family = binomial, data = TransportePrivado)
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                    4.72942    4.21053   1.123   0.2613  
## B07P414                        0.15249    1.65244   0.092   0.9265  
## B07P415                        0.49228    1.45666   0.338   0.7354  
## B07P416                       -0.53188    1.51741  -0.351   0.7259  
## B07P41No sé                    0.72722    2.51544   0.289   0.7725  
## C01P45Masculino               -0.09537    0.81558  -0.117   0.9069  
## C01P45Prefiero no responder  -15.14229 1565.50251  -0.010   0.9923  
## P05P10                        -0.55928    0.46672  -1.198   0.2308  
## P02P7                          0.01271    0.55185   0.023   0.9816  
## P04P9                          0.50000    0.32916   1.519   0.1288  
## P07P23                        -0.41628    0.63848  -0.652   0.5144  
## P01P6                         -0.23878    0.30687  -0.778   0.4365  
## A02P5                         -0.25415    0.33981  -0.748   0.4545  
## A04P12                        -0.67898    0.32895  -2.064   0.0390 *
## A03P11                         0.11504    0.25146   0.457   0.6473  
## E01P34                        -1.23423    1.12303  -1.099   0.2718  
## E02P35                         0.26703    1.09288   0.244   0.8070  
## E03P36                         3.31825    1.43117   2.319   0.0204 *
## E04P37                        -0.96860    1.06978  -0.905   0.3652  
## E05P38                         0.11724    1.22195   0.096   0.9236  
## E06P39                        -0.78047    1.22490  -0.637   0.5240  
## T04P16                        -0.33222    0.25115  -1.323   0.1859  
## T02P14                         0.50853    0.44905   1.132   0.2574  
## T01P13                         0.08661    0.27818   0.311   0.7555  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 92.792  on 67  degrees of freedom
## Residual deviance: 64.053  on 44  degrees of freedom
## AIC: 112.05
## 
## Number of Fisher Scoring iterations: 15

##                 (Intercept)                     B07P414 
##                1.132304e+02                1.164728e+00 
##                     B07P415                     B07P416 
##                1.636049e+00                5.874999e-01 
##                 B07P41No sé             C01P45Masculino 
##                2.069323e+00                9.090374e-01 
## C01P45Prefiero no responder                      P05P10 
##                2.653306e-07                5.716208e-01 
##                       P02P7                       P04P9 
##                1.012789e+00                1.648724e+00 
##                      P07P23                       P01P6 
##                6.594924e-01                7.875850e-01 
##                       A02P5                      A04P12 
##                7.755735e-01                5.071328e-01 
##                      A03P11                      E01P34 
##                1.121915e+00                2.910601e-01 
##                      E02P35                      E03P36 
##                1.306083e+00                2.761199e+01 
##                      E04P37                      E05P38 
##                3.796130e-01                1.124391e+00 
##                      E06P39                      T04P16 
##                4.581894e-01                7.173323e-01 
##                      T02P14                      T01P13 
##                1.662847e+00                1.090469e+00

4.3 Pseudo R² de McFadden

Función para calcular el R² y resultados para cada modelo.

pseudo_r2 <- function(m) {
  full <- logLik(m)
  null <- logLik(update(m, . ~ 1))
  1 - as.numeric(full / null)
}
cat("R² General: ", pseudo_r2(modelo), "\n")

## R² General:  0.1954445

cat("R² Movilidad Sostenible: ", pseudo_r2(modelo_ms), "\n")

## R² Movilidad Sostenible:  0.4999911

cat("R² Transporte Privado: ", pseudo_r2(modelo_tp), "\n")

## R² Transporte Privado:  0.309712

5. Modelos reducidos y variables significativas

A continuación se emplea la función variables_significativas_seguras para extraer predictores con p < 0.20 y reajustar los modelos reducidos correspondientes.

# Función para identificar variables significativas (p < 0.20)
variables_significativas_seguras <- function(modelo, datos, umbral = 0.2) {
  coefs <- summary(modelo)$coefficients
  filas <- rownames(coefs)[coefs[,4] < umbral & rownames(coefs) != "(Intercept)"]
  # Limpiar nombres base (antes de puntos o interacciones)
  base <- unique(sapply(strsplit(filas, "[\\.:]"), `[[`, 1))
  # Filtrar sólo nombres presentes en datos
  intersect(base, names(datos))
}

# Obtener predictores con p < 0.20 para cada modelo
gen_sig <- variables_significativas_seguras(modelo, df)
ms_sig  <- variables_significativas_seguras(modelo_ms, MovilidadSostenible)
tp_sig  <- variables_significativas_seguras(modelo_tp, TransportePrivado)

# Ajustar y mostrar modelos reducidos sólo si hay predictores
# General reducido
if (length(gen_sig) > 0) {
  form_gen_sig <- as.formula(paste("T07P31 ~", paste(gen_sig, collapse = " + ")))
  modelo_general_sig <- glm(form_gen_sig, data = df, family = binomial)
  cat("## General reducido (p<0.20)
")
  print(summary(modelo_general_sig))
  cat("McFadden R²:", pseudo_r2(modelo_general_sig), "
")
} else {
  cat("No hay predictores con p < 0.20 para el modelo general reducido.
")
}

## ## General reducido (p<0.20)
## 
## Call:
## glm(formula = form_gen_sig, family = binomial, data = df)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -1.852717   0.683922  -2.709  0.00675 **
## T04P16       0.004969   0.117803   0.042  0.96636   
## T02P14       0.313169   0.139467   2.245  0.02474 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 211.01  on 162  degrees of freedom
## Residual deviance: 205.50  on 160  degrees of freedom
## AIC: 211.5
## 
## Number of Fisher Scoring iterations: 4
## 
## McFadden R²: 0.0260739

# Movilidad Sostenible reducido
if (length(ms_sig) > 0) {
  form_ms_sig <- as.formula(paste("T07P31 ~", paste(ms_sig, collapse = " + ")))
  modelo_ms_sig <- glm(form_ms_sig, data = MovilidadSostenible, family = binomial)
  cat("## Movilidad Sostenible reducido (p<0.20)
")
  print(summary(modelo_ms_sig))
  cat("McFadden R²:", pseudo_r2(modelo_ms_sig), "
")
} else {
  cat("No hay predictores con p < 0.20 para el modelo MS reducido.
")
}

## ## Movilidad Sostenible reducido (p<0.20)
## 
## Call:
## glm(formula = form_ms_sig, family = binomial, data = MovilidadSostenible)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -0.4478     1.3803  -0.324    0.746
## P02P7         0.1956     0.3027   0.646    0.518
## P04P9        -0.3679     0.2368  -1.554    0.120
## P07P23       -0.5339     0.3714  -1.437    0.151
## A02P5         0.3211     0.2581   1.244    0.213
## T01P13        0.2860     0.2150   1.330    0.183
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 105.033  on 87  degrees of freedom
## Residual deviance:  88.869  on 82  degrees of freedom
## AIC: 100.87
## 
## Number of Fisher Scoring iterations: 4
## 
## McFadden R²: 0.1538866

# Transporte Privado reducido
if (length(tp_sig) > 0) {
  form_tp_sig <- as.formula(paste("T07P31 ~", paste(tp_sig, collapse = " + ")))
  modelo_tp_sig <- glm(form_tp_sig, data = TransportePrivado, family = binomial)
  cat("## Transporte Privado reducido (p<0.20)
")
  print(summary(modelo_tp_sig))
  cat("McFadden R²:", pseudo_r2(modelo_tp_sig), "
")
} else {
  cat("No hay predictores con p < 0.20 para el modelo TP reducido.
")
}

## ## Transporte Privado reducido (p<0.20)
## 
## Call:
## glm(formula = form_tp_sig, family = binomial, data = TransportePrivado)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.1717     1.0604  -0.162   0.8713  
## P04P9         0.4082     0.2236   1.825   0.0679 .
## A04P12       -0.2971     0.2161  -1.375   0.1690  
## E03P36        2.1261     0.8552   2.486   0.0129 *
## T04P16       -0.2115     0.1891  -1.119   0.2632  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 92.792  on 67  degrees of freedom
## Residual deviance: 78.149  on 63  degrees of freedom
## AIC: 88.149
## 
## Number of Fisher Scoring iterations: 3
## 
## McFadden R²: 0.1578038

6. Visualización de resultados clave

Se grafican porcentajes de cambio de vestimenta para la muestra total, por modo y por género.

datos_vis <- datos %>%
  mutate(
    cambio = factor(T07P31, levels=c(0,1), labels=c("No cambia","Cambia")),
    modo   = case_when(
      T05P17 %in% c("Transmilenio","SITP Zonal","Bicicleta (Propia o Tembici)","Caminata") ~ "Movilidad Sostenible",
      T05P17 %in% c("Automóvil particular (Como conductor, pasajero o en carro compartido)","Motocicleta") ~ "Transporte Privado",
      TRUE ~ NA_character_
    )
  )

# Total
datos_vis %>%
  count(cambio) %>%
  mutate(pct = n/sum(n)*100) %>%
  ggplot(aes(cambio,pct,fill=cambio)) +
    geom_col(show.legend=FALSE) +
    geom_text(aes(label=paste0(sprintf("%.1f",pct),"%")), vjust=-0.5) +
    labs(title="Cambio de vestimenta - Total", y="%") +
    theme_minimal()

# Por modo
datos_vis %>%
  filter(!is.na(modo)) %>%
  count(modo,cambio) %>%
  group_by(modo) %>%
  mutate(pct=n/sum(n)*100) %>%
  ggplot(aes(modo,pct,fill=cambio)) +
    geom_col(position="dodge") +
    geom_text(aes(label=paste0(sprintf("%.1f",pct),"%")),
              position=position_dodge(0.9), vjust=-0.3) +
    labs(title="Cambio de vestimenta por modo", x="Modo", y="%") +
    theme_minimal() + theme(axis.text.x=element_text(angle=45,hjust=1))

# Por género
datos_vis %>%
  count(C01P45,cambio) %>%
  filter(C01P45!="Prefiero no responder") %>%
  group_by(C01P45) %>%
  mutate(pct=n/sum(n)*100) %>%
  ggplot(aes(C01P45,pct,fill=cambio)) +
    geom_col(position="dodge") +
    geom_text(aes(label=paste0(sprintf("%.1f",pct),"%")),
              position=position_dodge(0.9), vjust=-0.3) +
    labs(title="Cambio de vestimenta por género", x="Género", y="%") +
    theme_minimal()

Modelo Tarea 2

Tomás Villescas

2025-05-20