##CARGA Y LIMPIEZA
# --- CARGA Y ESTRUCTURA ---
library(tidyr)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Leer sin encabezados
df_raw <- read_excel("EncuestaMovilidad.xlsx", col_names = FALSE)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
# Nombres técnicos desde fila 2
colnames <- df_raw[2, ] |> unlist() |> as.character()
# Diccionario de preguntas (fila 3)
questions <- df_raw[3, ] |> unlist() |> as.character()
names(questions) <- colnames # Mapea Q1 → ¿texto?
# Datos reales desde fila 4
df <- df_raw[-c(1, 2, 3), ]
n
## function ()
## {
## peek_mask()$get_current_group_size()
## }
## <bytecode: 0x63f30c3b9008>
## <environment: namespace:dplyr>
str(df) # Fechas y números deben estar convertidos correctamente
## tibble [101 × 29] (S3: tbl_df/tbl/data.frame)
## $ ...1 : chr [1:101] "45818.735185185185" "45823.57929398148" "45823.57996527778" "45823.58033564815" ...
## $ ...2 : chr [1:101] "45818.735300925924" "45823.57979166666" "45823.58027777778" "45823.72761574074" ...
## $ ...3 : chr [1:101] "Survey Preview" "IP Address" "IP Address" "IP Address" ...
## $ ...4 : chr [1:101] NA "189.175.0.159" "189.175.0.159" "131.178.102.176" ...
## $ ...5 : chr [1:101] "100.0" "100.0" "100.0" "100.0" ...
## $ ...6 : chr [1:101] "9.0" "43.0" "27.0" "12725.0" ...
## $ ...7 : chr [1:101] "True" "True" "True" "True" ...
## $ ...8 : chr [1:101] "45818.73530623843" "45823.57980677083" "45823.5802912963" "45823.72763070602" ...
## $ ...9 : chr [1:101] "R_3loOQFR84p0F4Kx" "R_3OOyFXTbPzV4l3Z" "R_3zAvdXywOfrOudj" "R_1Ujip62zJkaNWSZ" ...
## $ ...10: chr [1:101] NA NA NA NA ...
## $ ...11: chr [1:101] NA NA NA NA ...
## $ ...12: chr [1:101] NA NA NA NA ...
## $ ...13: chr [1:101] NA NA NA NA ...
## $ ...14: chr [1:101] "25.6449" "25.6818" "25.6818" "25.6818" ...
## $ ...15: chr [1:101] "-100.311" "-100.2627" "-100.2627" "-100.2627" ...
## $ ...16: chr [1:101] "preview" "anonymous" "anonymous" "anonymous" ...
## $ ...17: chr [1:101] "ES-ES" "ES-ES" "ES-ES" "ES-ES" ...
## $ ...18: chr [1:101] "Si, acepto" "Si, acepto" "Si, acepto" "Si, acepto" ...
## $ ...19: chr [1:101] NA "Femenino" "Femenino" "Masculino" ...
## $ ...20: chr [1:101] NA "18 años" "22 años" "20 años" ...
## $ ...21: chr [1:101] NA NA NA NA ...
## $ ...22: chr [1:101] NA NA NA NA ...
## $ ...23: chr [1:101] NA NA NA NA ...
## $ ...24: chr [1:101] NA "Algo" "Bastante" "Bastante" ...
## $ ...25: chr [1:101] NA "Neutral" "De acuerdo" "Neutral" ...
## $ ...26: chr [1:101] NA "En desacuerdo" "Neutral" "Neutral" ...
## $ ...27: chr [1:101] NA "Neutral" "De acuerdo" "Neutral" ...
## $ ...28: chr [1:101] NA "De acuerdo" "De acuerdo" "Neutral" ...
## $ ...29: chr [1:101] "control" "luis" "luis_injusticia" "luis" ...
# 1. Usar fila 1 como nombres
colnames <- df_raw[1, ] |> unlist() |> as.character()
# 2. Extraer datos desde fila 2 en adelante
df <- df_raw[-1, ]
names(df) <- colnames
# 3. Limpiar estructura
df <- as.data.frame(df)
rownames(df) <- NULL
# Conversión de columnas clave
df <- df %>%
filter(Finished == "True") %>%
mutate(
Progress = as.numeric(Progress),
`Duration (in seconds)` = as.numeric(`Duration (in seconds)`),
StartDate = as.POSIXct(as.numeric(StartDate), origin = "1899-12-30"),
EndDate = as.POSIXct(as.numeric(EndDate), origin = "1899-12-30"),
RecordedDate = as.POSIXct(as.numeric(RecordedDate), origin = "1899-12-30")
)
colnames <- df_raw[1, ] |> unlist() |> as.character()
df <- df_raw[-1, ]
names(df) <- colnames
names(df) # ¿Ves nombres como "StartDate", "Progress", "Q1", etc.?
## [1] "StartDate" "EndDate" "Status"
## [4] "IPAddress" "Progress" "Duration (in seconds)"
## [7] "Finished" "RecordedDate" "ResponseId"
## [10] "RecipientLastName" "RecipientFirstName" "RecipientEmail"
## [13] "ExternalReference" "LocationLatitude" "LocationLongitude"
## [16] "DistributionChannel" "UserLanguage" "Q8"
## [19] "Q1" "Q4" "Q23"
## [22] "Q6" "Q24" "Q14"
## [25] "Q16" "Q17" "Q19"
## [28] "Q20" "tx"
str(df) # ¿Ves Progress como numérico? ¿Fechas como POSIXct?
## tibble [103 × 29] (S3: tbl_df/tbl/data.frame)
## $ StartDate : chr [1:103] "Fecha de inicio" "45818.73303240741" "45818.735185185185" "45823.57929398148" ...
## $ EndDate : chr [1:103] "Fecha final" "45818.73327546296" "45818.735300925924" "45823.57979166666" ...
## $ Status : chr [1:103] "Tipo de respuesta" "Survey Preview" "Survey Preview" "IP Address" ...
## $ IPAddress : chr [1:103] "Dirección IP" NA NA "189.175.0.159" ...
## $ Progress : chr [1:103] "Progreso" "100.0" "100.0" "100.0" ...
## $ Duration (in seconds): chr [1:103] "Duración (en segundos)" "20.0" "9.0" "43.0" ...
## $ Finished : chr [1:103] "Finalizado" "True" "True" "True" ...
## $ RecordedDate : chr [1:103] "Fecha registrada" "45818.73327994213" "45818.73530623843" "45823.57980677083" ...
## $ ResponseId : chr [1:103] "ID de respuesta" "R_31Xxwh2Evy2a5dO" "R_3loOQFR84p0F4Kx" "R_3OOyFXTbPzV4l3Z" ...
## $ RecipientLastName : chr [1:103] "Apellido del destinatario" NA NA NA ...
## $ RecipientFirstName : chr [1:103] "Nombre del destinatario" NA NA NA ...
## $ RecipientEmail : chr [1:103] "Correo electrónico del destinatario" NA NA NA ...
## $ ExternalReference : chr [1:103] "Datos de referencia externos" NA NA NA ...
## $ LocationLatitude : chr [1:103] "Latitud de ubicación" "25.6449" "25.6449" "25.6818" ...
## $ LocationLongitude : chr [1:103] "Longitud de ubicación" "-100.311" "-100.311" "-100.2627" ...
## $ DistributionChannel : chr [1:103] "Canal de la distribución" "preview" "preview" "anonymous" ...
## $ UserLanguage : chr [1:103] "Idioma del usuario" "ES-ES" "ES-ES" "ES-ES" ...
## $ Q8 : chr [1:103] "Estimadx participante: Te invitamos a responder esta encuesta como parte de un proyecto académico realizado por"| __truncated__ "Si, acepto" "Si, acepto" "Si, acepto" ...
## $ Q1 : chr [1:103] "Género" "Masculino" NA "Femenino" ...
## $ Q4 : chr [1:103] "¿Cuántos años tienes?" "18 años" NA "18 años" ...
## $ Q23 : chr [1:103] "Por favor siga con la encuesta, ¡gracias!" NA NA NA ...
## $ Q6 : chr [1:103] "La siguiente sección tiene como proposito que a partir de situación presentada, usted pueda escog..." NA NA NA ...
## $ Q24 : chr [1:103] "La siguiente sección tiene como proposito que a partir de situación presentada, usted pueda escoger la opción c"| __truncated__ NA NA NA ...
## $ Q14 : chr [1:103] "¿Qué tan importantes son las políticas de transporte?" NA NA "Algo" ...
## $ Q16 : chr [1:103] "Me siento motivadx a involucrarme en acciones colectivas relacionadas con la mejora del transporte público." NA NA "Neutral" ...
## $ Q17 : chr [1:103] "Creo que mi participación en temas públicos puede generar un cambio" NA NA "En desacuerdo" ...
## $ Q19 : chr [1:103] "Estoy dispuestx a firmar o compartir una petición para exigir mejoras en la movilidad urbana" NA NA "Neutral" ...
## $ Q20 : chr [1:103] "Creo que la movilidad urbana debería ser una prioridad en la agenda política local" NA NA "De acuerdo" ...
## $ tx : chr [1:103] "tx" "luis_injusticia" "control" "luis" ...
summary(df) # ¿Valores correctos, sin NA masivos?
## StartDate EndDate Status IPAddress
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Progress Duration (in seconds) Finished RecordedDate
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ResponseId RecipientLastName RecipientFirstName RecipientEmail
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ExternalReference LocationLatitude LocationLongitude DistributionChannel
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## UserLanguage Q8 Q1 Q4
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Q23 Q6 Q24 Q14
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Q16 Q17 Q19 Q20
## Length:103 Length:103 Length:103 Length:103
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## tx
## Length:103
## Class :character
## Mode :character
library(dplyr)
# 1. Eliminar la primera fila de etiquetas (actualmente en fila 1)
df <- df[-1, ]
# 2. Convertir columnas clave
df <- df %>%
mutate(
Progress = as.numeric(Progress),
`Duration (in seconds)` = as.numeric(`Duration (in seconds)`),
StartDate = as.POSIXct(as.numeric(StartDate), origin = "1899-12-30"),
EndDate = as.POSIXct(as.numeric(EndDate), origin = "1899-12-30"),
RecordedDate = as.POSIXct(as.numeric(RecordedDate), origin = "1899-12-30")
) %>%
filter(Finished == "True")
df <- df %>%
rename(
genero = Q1,
edad = Q4,
consentimiento = Q8,
politica_transporte = Q14,
motivacion = Q16,
cambio_publico = Q17,
peticion = Q19,
prioridad_local = Q20
)
write.csv(df, "encuesta_limpia_final.csv", row.names = FALSE)
head(df, 3)
## # A tibble: 3 × 29
## StartDate EndDate Status IPAddress Progress
## <dttm> <dttm> <chr> <chr> <dbl>
## 1 1899-12-30 12:43:38 1899-12-30 12:43:38 Survey Preview <NA> 100
## 2 1899-12-30 12:43:38 1899-12-30 12:43:38 Survey Preview <NA> 100
## 3 1899-12-30 12:43:43 1899-12-30 12:43:43 IP Address 189.175.0.159 100
## # ℹ 24 more variables: `Duration (in seconds)` <dbl>, Finished <chr>,
## # RecordedDate <dttm>, ResponseId <chr>, RecipientLastName <chr>,
## # RecipientFirstName <chr>, RecipientEmail <chr>, ExternalReference <chr>,
## # LocationLatitude <chr>, LocationLongitude <chr>, DistributionChannel <chr>,
## # UserLanguage <chr>, consentimiento <chr>, genero <chr>, edad <chr>,
## # Q23 <chr>, Q6 <chr>, Q24 <chr>, politica_transporte <chr>,
## # motivacion <chr>, cambio_publico <chr>, peticion <chr>, …
##ANÁLISIS
# Crear variable de grupo con nombres claros
df <- df %>%
mutate(grupo = case_when(
tx == "control" ~ "Control",
tx == "luis" ~ "Anecdote",
tx == "luis_injusticia" ~ "Anecdote + Injustice",
TRUE ~ NA_character_
))
# Escala Likert
escala_likert <- c(
"Muy en desacuerdo" = 1,
"En desacuerdo" = 2,
"Neutral" = 3,
"De acuerdo" = 4,
"Muy de acuerdo" = 5
)
# Convertir motivación y petición a escala numérica
df <- df %>%
mutate(
motivacion_num = escala_likert[motivacion],
peticion_num = escala_likert[peticion]
)
ANOVA Q16 motivación
modelo_motiv <- aov(motivacion_num ~ grupo, data = df)
summary(modelo_motiv)
## Df Sum Sq Mean Sq F value Pr(>F)
## grupo 2 6.7 3.349 4.975 0.00878 **
## Residuals 97 65.3 0.673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
TukeyHSD(modelo_motiv)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = motivacion_num ~ grupo, data = df)
##
## $grupo
## diff lwr upr p adj
## Anecdote + Injustice-Anecdote 0.3449198 -0.1323194 0.8221590 0.2028361
## Control-Anecdote -0.2914439 -0.7686831 0.1857954 0.3178458
## Control-Anecdote + Injustice -0.6363636 -1.1171511 -0.1555761 0.0060891
ANOVA Q19 firmar petición
modelo_peticion <- aov(peticion_num ~ grupo, data = df)
summary(modelo_peticion)
## Df Sum Sq Mean Sq F value Pr(>F)
## grupo 2 8.73 4.364 8.918 0.000278 ***
## Residuals 97 47.46 0.489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
TukeyHSD(modelo_peticion)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = peticion_num ~ grupo, data = df)
##
## $grupo
## diff lwr upr p adj
## Anecdote + Injustice-Anecdote 0.3609626 -0.04590027 0.76782541 0.0927351
## Control-Anecdote -0.3663102 -0.77317300 0.04055268 0.0865811
## Control-Anecdote + Injustice -0.7272727 -1.13716061 -0.31738484 0.0001597
tabla con medias y errores estándar por grupo
library(tidyr)
df_summary <- df %>%
select(grupo, motivacion_num, peticion_num) %>%
pivot_longer(cols = c(motivacion_num, peticion_num),
names_to = "variable", values_to = "valor") %>%
group_by(grupo, variable) %>%
summarise(
media = mean(as.numeric(valor), na.rm = TRUE),
se = sd(as.numeric(valor), na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
library(ggplot2)
ggplot(df_summary, aes(x = grupo, y = media, fill = grupo)) +
geom_col(position = "dodge", width = 0.6) +
geom_errorbar(aes(ymin = media - se, ymax = media + se), width = 0.2) +
facet_wrap(~variable, labeller = as_labeller(c(
motivacion_num = "Motivación colectiva (Q16)",
peticion_num = "Petición (Q19)"
))) +
labs(
title = "Media de respuestas por grupo (con error estándar)",
x = "Grupo experimental",
y = "Puntaje (1 a 5)"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
ggsave("grafico_grupos.png", width = 10, height = 6, dpi = 300)
library(ggplot2)
# Plot
ggplot(df_summary, aes(x = grupo, y = media, fill = grupo)) +
geom_col(position = "dodge", width = 0.6) +
geom_errorbar(aes(ymin = media - se, ymax = media + se), width = 0.2) +
facet_wrap(~variable, labeller = as_labeller(c(
motivacion_num = "Collective Motivation (Q16)",
peticion_num = "Petition (Q19)"
))) +
labs(
title = "Mean Responses by Group (with Standard Error)",
x = "Experimental Group",
y = "Score (1 to 5)"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none")
# Save the plot
ggsave("group_means_plot.png", width = 10, height = 6, dpi = 300)