##CARGA Y LIMPIEZA

# --- CARGA Y ESTRUCTURA ---
library(tidyr)
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Leer sin encabezados
df_raw <- read_excel("EncuestaMovilidad.xlsx", col_names = FALSE)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
# Nombres técnicos desde fila 2
colnames <- df_raw[2, ] |> unlist() |> as.character()

# Diccionario de preguntas (fila 3)
questions <- df_raw[3, ] |> unlist() |> as.character()
names(questions) <- colnames  # Mapea Q1 → ¿texto?

# Datos reales desde fila 4
df <- df_raw[-c(1, 2, 3), ]
n
## function () 
## {
##     peek_mask()$get_current_group_size()
## }
## <bytecode: 0x63f30c3b9008>
## <environment: namespace:dplyr>
str(df)                # Fechas y números deben estar convertidos correctamente
## tibble [101 × 29] (S3: tbl_df/tbl/data.frame)
##  $ ...1 : chr [1:101] "45818.735185185185" "45823.57929398148" "45823.57996527778" "45823.58033564815" ...
##  $ ...2 : chr [1:101] "45818.735300925924" "45823.57979166666" "45823.58027777778" "45823.72761574074" ...
##  $ ...3 : chr [1:101] "Survey Preview" "IP Address" "IP Address" "IP Address" ...
##  $ ...4 : chr [1:101] NA "189.175.0.159" "189.175.0.159" "131.178.102.176" ...
##  $ ...5 : chr [1:101] "100.0" "100.0" "100.0" "100.0" ...
##  $ ...6 : chr [1:101] "9.0" "43.0" "27.0" "12725.0" ...
##  $ ...7 : chr [1:101] "True" "True" "True" "True" ...
##  $ ...8 : chr [1:101] "45818.73530623843" "45823.57980677083" "45823.5802912963" "45823.72763070602" ...
##  $ ...9 : chr [1:101] "R_3loOQFR84p0F4Kx" "R_3OOyFXTbPzV4l3Z" "R_3zAvdXywOfrOudj" "R_1Ujip62zJkaNWSZ" ...
##  $ ...10: chr [1:101] NA NA NA NA ...
##  $ ...11: chr [1:101] NA NA NA NA ...
##  $ ...12: chr [1:101] NA NA NA NA ...
##  $ ...13: chr [1:101] NA NA NA NA ...
##  $ ...14: chr [1:101] "25.6449" "25.6818" "25.6818" "25.6818" ...
##  $ ...15: chr [1:101] "-100.311" "-100.2627" "-100.2627" "-100.2627" ...
##  $ ...16: chr [1:101] "preview" "anonymous" "anonymous" "anonymous" ...
##  $ ...17: chr [1:101] "ES-ES" "ES-ES" "ES-ES" "ES-ES" ...
##  $ ...18: chr [1:101] "Si, acepto" "Si, acepto" "Si, acepto" "Si, acepto" ...
##  $ ...19: chr [1:101] NA "Femenino" "Femenino" "Masculino" ...
##  $ ...20: chr [1:101] NA "18 años" "22 años" "20 años" ...
##  $ ...21: chr [1:101] NA NA NA NA ...
##  $ ...22: chr [1:101] NA NA NA NA ...
##  $ ...23: chr [1:101] NA NA NA NA ...
##  $ ...24: chr [1:101] NA "Algo" "Bastante" "Bastante" ...
##  $ ...25: chr [1:101] NA "Neutral" "De acuerdo" "Neutral" ...
##  $ ...26: chr [1:101] NA "En desacuerdo" "Neutral" "Neutral" ...
##  $ ...27: chr [1:101] NA "Neutral" "De acuerdo" "Neutral" ...
##  $ ...28: chr [1:101] NA "De acuerdo" "De acuerdo" "Neutral" ...
##  $ ...29: chr [1:101] "control" "luis" "luis_injusticia" "luis" ...
# 1. Usar fila 1 como nombres
colnames <- df_raw[1, ] |> unlist() |> as.character()

# 2. Extraer datos desde fila 2 en adelante
df <- df_raw[-1, ]
names(df) <- colnames

# 3. Limpiar estructura
df <- as.data.frame(df)
rownames(df) <- NULL
# Conversión de columnas clave
df <- df %>%
  filter(Finished == "True") %>%
  mutate(
    Progress = as.numeric(Progress),
    `Duration (in seconds)` = as.numeric(`Duration (in seconds)`),
    StartDate = as.POSIXct(as.numeric(StartDate), origin = "1899-12-30"),
    EndDate = as.POSIXct(as.numeric(EndDate), origin = "1899-12-30"),
    RecordedDate = as.POSIXct(as.numeric(RecordedDate), origin = "1899-12-30")
  )
colnames <- df_raw[1, ] |> unlist() |> as.character()
df <- df_raw[-1, ]
names(df) <- colnames
names(df)             # ¿Ves nombres como "StartDate", "Progress", "Q1", etc.?
##  [1] "StartDate"             "EndDate"               "Status"               
##  [4] "IPAddress"             "Progress"              "Duration (in seconds)"
##  [7] "Finished"              "RecordedDate"          "ResponseId"           
## [10] "RecipientLastName"     "RecipientFirstName"    "RecipientEmail"       
## [13] "ExternalReference"     "LocationLatitude"      "LocationLongitude"    
## [16] "DistributionChannel"   "UserLanguage"          "Q8"                   
## [19] "Q1"                    "Q4"                    "Q23"                  
## [22] "Q6"                    "Q24"                   "Q14"                  
## [25] "Q16"                   "Q17"                   "Q19"                  
## [28] "Q20"                   "tx"
str(df)               # ¿Ves Progress como numérico? ¿Fechas como POSIXct?
## tibble [103 × 29] (S3: tbl_df/tbl/data.frame)
##  $ StartDate            : chr [1:103] "Fecha de inicio" "45818.73303240741" "45818.735185185185" "45823.57929398148" ...
##  $ EndDate              : chr [1:103] "Fecha final" "45818.73327546296" "45818.735300925924" "45823.57979166666" ...
##  $ Status               : chr [1:103] "Tipo de respuesta" "Survey Preview" "Survey Preview" "IP Address" ...
##  $ IPAddress            : chr [1:103] "Dirección IP" NA NA "189.175.0.159" ...
##  $ Progress             : chr [1:103] "Progreso" "100.0" "100.0" "100.0" ...
##  $ Duration (in seconds): chr [1:103] "Duración (en segundos)" "20.0" "9.0" "43.0" ...
##  $ Finished             : chr [1:103] "Finalizado" "True" "True" "True" ...
##  $ RecordedDate         : chr [1:103] "Fecha registrada" "45818.73327994213" "45818.73530623843" "45823.57980677083" ...
##  $ ResponseId           : chr [1:103] "ID de respuesta" "R_31Xxwh2Evy2a5dO" "R_3loOQFR84p0F4Kx" "R_3OOyFXTbPzV4l3Z" ...
##  $ RecipientLastName    : chr [1:103] "Apellido del destinatario" NA NA NA ...
##  $ RecipientFirstName   : chr [1:103] "Nombre del destinatario" NA NA NA ...
##  $ RecipientEmail       : chr [1:103] "Correo electrónico del destinatario" NA NA NA ...
##  $ ExternalReference    : chr [1:103] "Datos de referencia externos" NA NA NA ...
##  $ LocationLatitude     : chr [1:103] "Latitud de ubicación" "25.6449" "25.6449" "25.6818" ...
##  $ LocationLongitude    : chr [1:103] "Longitud de ubicación" "-100.311" "-100.311" "-100.2627" ...
##  $ DistributionChannel  : chr [1:103] "Canal de la distribución" "preview" "preview" "anonymous" ...
##  $ UserLanguage         : chr [1:103] "Idioma del usuario" "ES-ES" "ES-ES" "ES-ES" ...
##  $ Q8                   : chr [1:103] "Estimadx participante: Te invitamos a responder esta encuesta como parte de un proyecto académico realizado por"| __truncated__ "Si, acepto" "Si, acepto" "Si, acepto" ...
##  $ Q1                   : chr [1:103] "Género" "Masculino" NA "Femenino" ...
##  $ Q4                   : chr [1:103] "¿Cuántos años tienes?" "18 años" NA "18 años" ...
##  $ Q23                  : chr [1:103] "Por favor siga con la encuesta, ¡gracias!" NA NA NA ...
##  $ Q6                   : chr [1:103] "La siguiente sección tiene como proposito que a partir de situación presentada, usted pueda escog..." NA NA NA ...
##  $ Q24                  : chr [1:103] "La siguiente sección tiene como proposito que a partir de situación presentada, usted pueda escoger la opción c"| __truncated__ NA NA NA ...
##  $ Q14                  : chr [1:103] "¿Qué tan importantes son las políticas de transporte?" NA NA "Algo" ...
##  $ Q16                  : chr [1:103] "Me siento motivadx a involucrarme en acciones colectivas relacionadas con la mejora del transporte público." NA NA "Neutral" ...
##  $ Q17                  : chr [1:103] "Creo que mi participación en temas públicos puede generar un cambio" NA NA "En desacuerdo" ...
##  $ Q19                  : chr [1:103] "Estoy dispuestx a firmar o compartir una petición para exigir mejoras en la movilidad urbana" NA NA "Neutral" ...
##  $ Q20                  : chr [1:103] "Creo que la movilidad urbana debería ser una prioridad en la agenda política local" NA NA "De acuerdo" ...
##  $ tx                   : chr [1:103] "tx" "luis_injusticia" "control" "luis" ...
summary(df)           # ¿Valores correctos, sin NA masivos?
##   StartDate           EndDate             Status           IPAddress        
##  Length:103         Length:103         Length:103         Length:103        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    Progress         Duration (in seconds)   Finished         RecordedDate      
##  Length:103         Length:103            Length:103         Length:103        
##  Class :character   Class :character      Class :character   Class :character  
##  Mode  :character   Mode  :character      Mode  :character   Mode  :character  
##   ResponseId        RecipientLastName  RecipientFirstName RecipientEmail    
##  Length:103         Length:103         Length:103         Length:103        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ExternalReference  LocationLatitude   LocationLongitude  DistributionChannel
##  Length:103         Length:103         Length:103         Length:103         
##  Class :character   Class :character   Class :character   Class :character   
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character   
##  UserLanguage            Q8                 Q1                 Q4           
##  Length:103         Length:103         Length:103         Length:103        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Q23                 Q6                Q24                Q14           
##  Length:103         Length:103         Length:103         Length:103        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Q16                Q17                Q19                Q20           
##  Length:103         Length:103         Length:103         Length:103        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##       tx           
##  Length:103        
##  Class :character  
##  Mode  :character
library(dplyr)

# 1. Eliminar la primera fila de etiquetas (actualmente en fila 1)
df <- df[-1, ]

# 2. Convertir columnas clave
df <- df %>%
  mutate(
    Progress = as.numeric(Progress),
    `Duration (in seconds)` = as.numeric(`Duration (in seconds)`),
    StartDate = as.POSIXct(as.numeric(StartDate), origin = "1899-12-30"),
    EndDate = as.POSIXct(as.numeric(EndDate), origin = "1899-12-30"),
    RecordedDate = as.POSIXct(as.numeric(RecordedDate), origin = "1899-12-30")
  ) %>%
  filter(Finished == "True")
df <- df %>%
  rename(
    genero = Q1,
    edad = Q4,
    consentimiento = Q8,
    politica_transporte = Q14,
    motivacion = Q16,
    cambio_publico = Q17,
    peticion = Q19,
    prioridad_local = Q20
  )
write.csv(df, "encuesta_limpia_final.csv", row.names = FALSE)
head(df, 3)
## # A tibble: 3 × 29
##   StartDate           EndDate             Status         IPAddress     Progress
##   <dttm>              <dttm>              <chr>          <chr>            <dbl>
## 1 1899-12-30 12:43:38 1899-12-30 12:43:38 Survey Preview <NA>               100
## 2 1899-12-30 12:43:38 1899-12-30 12:43:38 Survey Preview <NA>               100
## 3 1899-12-30 12:43:43 1899-12-30 12:43:43 IP Address     189.175.0.159      100
## # ℹ 24 more variables: `Duration (in seconds)` <dbl>, Finished <chr>,
## #   RecordedDate <dttm>, ResponseId <chr>, RecipientLastName <chr>,
## #   RecipientFirstName <chr>, RecipientEmail <chr>, ExternalReference <chr>,
## #   LocationLatitude <chr>, LocationLongitude <chr>, DistributionChannel <chr>,
## #   UserLanguage <chr>, consentimiento <chr>, genero <chr>, edad <chr>,
## #   Q23 <chr>, Q6 <chr>, Q24 <chr>, politica_transporte <chr>,
## #   motivacion <chr>, cambio_publico <chr>, peticion <chr>, …

##ANÁLISIS

# Crear variable de grupo con nombres claros
df <- df %>%
  mutate(grupo = case_when(
    tx == "control" ~ "Control",
    tx == "luis" ~ "Anecdote",
    tx == "luis_injusticia" ~ "Anecdote + Injustice",
    TRUE ~ NA_character_
  ))

# Escala Likert
escala_likert <- c(
  "Muy en desacuerdo" = 1,
  "En desacuerdo" = 2,
  "Neutral" = 3,
  "De acuerdo" = 4,
  "Muy de acuerdo" = 5
)

# Convertir motivación y petición a escala numérica
df <- df %>%
  mutate(
    motivacion_num = escala_likert[motivacion],
    peticion_num = escala_likert[peticion]
  )

ANOVA Q16 motivación

modelo_motiv <- aov(motivacion_num ~ grupo, data = df)
summary(modelo_motiv)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## grupo        2    6.7   3.349   4.975 0.00878 **
## Residuals   97   65.3   0.673                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
TukeyHSD(modelo_motiv)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = motivacion_num ~ grupo, data = df)
## 
## $grupo
##                                     diff        lwr        upr     p adj
## Anecdote + Injustice-Anecdote  0.3449198 -0.1323194  0.8221590 0.2028361
## Control-Anecdote              -0.2914439 -0.7686831  0.1857954 0.3178458
## Control-Anecdote + Injustice  -0.6363636 -1.1171511 -0.1555761 0.0060891

ANOVA Q19 firmar petición

modelo_peticion <- aov(peticion_num ~ grupo, data = df)
summary(modelo_peticion)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## grupo        2   8.73   4.364   8.918 0.000278 ***
## Residuals   97  47.46   0.489                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
TukeyHSD(modelo_peticion)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = peticion_num ~ grupo, data = df)
## 
## $grupo
##                                     diff         lwr         upr     p adj
## Anecdote + Injustice-Anecdote  0.3609626 -0.04590027  0.76782541 0.0927351
## Control-Anecdote              -0.3663102 -0.77317300  0.04055268 0.0865811
## Control-Anecdote + Injustice  -0.7272727 -1.13716061 -0.31738484 0.0001597

tabla con medias y errores estándar por grupo

library(tidyr)

df_summary <- df %>%
  select(grupo, motivacion_num, peticion_num) %>%
  pivot_longer(cols = c(motivacion_num, peticion_num),
               names_to = "variable", values_to = "valor") %>%
  group_by(grupo, variable) %>%
  summarise(
    media = mean(as.numeric(valor), na.rm = TRUE),
    se = sd(as.numeric(valor), na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )
library(ggplot2)

ggplot(df_summary, aes(x = grupo, y = media, fill = grupo)) +
  geom_col(position = "dodge", width = 0.6) +
  geom_errorbar(aes(ymin = media - se, ymax = media + se), width = 0.2) +
  facet_wrap(~variable, labeller = as_labeller(c(
    motivacion_num = "Motivación colectiva (Q16)",
    peticion_num = "Petición (Q19)"
  ))) +
  labs(
    title = "Media de respuestas por grupo (con error estándar)",
    x = "Grupo experimental",
    y = "Puntaje (1 a 5)"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

ggsave("grafico_grupos.png", width = 10, height = 6, dpi = 300)
library(ggplot2)

# Plot
ggplot(df_summary, aes(x = grupo, y = media, fill = grupo)) +
  geom_col(position = "dodge", width = 0.6) +
  geom_errorbar(aes(ymin = media - se, ymax = media + se), width = 0.2) +
  facet_wrap(~variable, labeller = as_labeller(c(
    motivacion_num = "Collective Motivation (Q16)",
    peticion_num = "Petition (Q19)"
  ))) +
  labs(
    title = "Mean Responses by Group (with Standard Error)",
    x = "Experimental Group",
    y = "Score (1 to 5)"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none")

# Save the plot
ggsave("group_means_plot.png", width = 10, height = 6, dpi = 300)