library(ggplot2)
library(lubridate)
##
## Adjuntando el paquete: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.4.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(textdata)
## Warning: package 'textdata' was built under R version 4.4.3
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ purrr 1.0.4 ✔ tibble 3.2.1
## ✔ readr 2.1.5 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.4.3
## Cargando paquete requerido: RColorBrewer
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(stopwords)
## Warning: package 'stopwords' was built under R version 4.4.3
library(RColorBrewer)
library(stringr)
Cargar datos
PS <- read_excel("C:\\Users\\Luis Mendoza\\Downloads\\Encuesta Perfil y SatisfacciĂ³n 2024.xlsx")
## Warning: Expecting numeric in T1172 / R1172C20: got 'Cada semana por trabajo'
colnames(PS)
## [1] "Mes" "Procedencia PaĂs"
## [3] "Procedencia Ciudad" "Edad"
## [5] "Motivo Visita" "Motivo Visita Otro"
## [7] "Noches Hospedadas" "Hospedaje en Hotel"
## [9] "Hospedaje en Extra Hotelero" "Hospedaje en Casa Familia/amigos"
## [11] "No se hospedĂ³" "Hospedaje en Otro"
## [13] "Transporte AviĂ³n" "Transporte AutobĂºs"
## [15] "Transporte AutomĂ³vil" "Transporte Motocicleta"
## [17] "Transporte Otro" "Gasto total en pesos"
## [19] "Primera Visita" "Visitas Previas"
## [21] "EV Experiencia" "EV Sostenibilidad"
## [23] "Experiencia" "Si Recomienda"
## [25] "No Recomienda" "Comentario RecomendaciĂ³n"
I. Transporte mĂ¡s
utilizado
PS_transporte <- PS %>%
pivot_longer(cols = starts_with("Transporte "),
names_to = "TipoTransporte",
values_to = "valor") %>%
filter(!is.na(valor) & valor > 0)
transporte_total <- PS_transporte %>%
group_by(TipoTransporte) %>%
summarise(Cantidad = n()) %>%
mutate(TipoTransporte = str_remove(TipoTransporte, "Transporte "))
ggplot(transporte_total, aes(x = reorder(TipoTransporte, -Cantidad), y = Cantidad)) +
geom_col(fill = "steelblue") +
labs(title = "Transporte mĂ¡s utilizado (Todos los visitantes)", x = "Transporte", y = "Cantidad de personas") +
theme_minimal()

PS_extranjeros <- PS %>% filter(`Procedencia PaĂs` != "MĂ©xico" & !is.na(`Procedencia PaĂs`))
transporte_extranjeros <- PS_extranjeros %>%
pivot_longer(cols = starts_with("Transporte "),
names_to = "TipoTransporte",
values_to = "valor") %>%
filter(!is.na(valor) & valor > 0) %>%
group_by(TipoTransporte) %>%
summarise(Cantidad = n()) %>%
mutate(TipoTransporte = str_remove(TipoTransporte, "Transporte "))
ggplot(transporte_extranjeros, aes(x = reorder(TipoTransporte, -Cantidad), y = Cantidad)) +
geom_col(fill = "darkorange") +
labs(title = "Transporte mĂ¡s utilizado (Visitantes extranjeros)", x = "Transporte", y = "Cantidad de personas") +
theme_minimal()

II. AnĂ¡lisis
Exploratorio de Datos
# 1. Definir nombres de meses vĂ¡lidos
meses_es <- c("enero", "febrero", "marzo", "abril", "mayo", "junio",
"julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre")
# 2. Normalizar y convertir "Mes" a fecha
PS$Mes <- tolower(trimws(PS$Mes)) # limpiar texto
PS$MesNum <- match(PS$Mes, meses_es) # convertir a nĂºmero de mes
PS$MesFecha <- as.Date(ISOdate(2024, PS$MesNum, 1)) # crear fecha segura
# 3. Filtrar registros vĂ¡lidos
PS <- PS[!is.na(PS$MesFecha), ]
# 4. Clasificar visitantes como "Normal" u "Outlier" segĂºn gasto
q3 <- quantile(PS$`Gasto total en pesos`, 0.75, na.rm = TRUE)
iqr <- IQR(PS$`Gasto total en pesos`, na.rm = TRUE)
limite_superior <- q3 + 1.5 * iqr
PS <- PS %>%
mutate(GrupoGasto = ifelse(`Gasto total en pesos` > limite_superior, "Outlier", "Normal"))
# 5. Calcular gasto total mensual por grupo
gasto_mensual_segmentado <- PS %>%
group_by(MesFecha, GrupoGasto) %>%
summarise(GastoTotal = sum(`Gasto total en pesos`, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'MesFecha'. You can override using the
## `.groups` argument.
# 6. Graficar tendencia mensual por grupo
ggplot(gasto_mensual_segmentado, aes(x = MesFecha, y = GastoTotal, color = GrupoGasto)) +
geom_line(size = 1.2) +
geom_smooth(se = FALSE, linetype = "dashed") +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
labs(title = "Tendencia mensual del gasto total por grupo de gasto",
x = "Mes", y = "Gasto total (MXN)", color = "Grupo de gasto") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Agregar una etiqueta para distinguir grupos
PS <- PS %>%
mutate(
GrupoGasto = ifelse(`Gasto total en pesos` > limite_superior, "Outliers", "Normales")
)
# Eliminar valores NA en gasto
PS_filtrado <- PS %>% filter(!is.na(`Gasto total en pesos`))
# Boxplot comparativo
library(ggplot2)
ggplot(PS_filtrado, aes(x = GrupoGasto, y = `Gasto total en pesos`, fill = GrupoGasto)) +
geom_boxplot(outlier.color = "red", outlier.shape = 16) +
scale_y_continuous(labels = scales::comma) +
labs(
title = "ComparaciĂ³n del Gasto Total",
x = "Grupo",
y = "Gasto total en pesos (MXN)"
) +
theme_minimal()

# ------------------------
# Definir nombres legibles para las variables
# ------------------------
nombre_vars <- c("Gasto", "Edad", "Noches", "Visitas")
# ------------------------
# 1. Matriz general
# ------------------------
vars_cor <- PS[, c("Gasto total en pesos", "Edad", "Noches Hospedadas", "Visitas Previas")] %>%
mutate_all(as.numeric)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Edad = .Primitive("as.double")(Edad)`.
## Caused by warning:
## ! NAs introducidos por coerciĂ³n
colnames(vars_cor) <- nombre_vars
cor_mat <- cor(vars_cor, use = "complete.obs")
corrplot(cor_mat,
method = "circle",
type = "lower",
tl.col = "black",
tl.cex = 1,
tl.srt = 45,
addCoef.col = "black",
number.cex = 0.7,
col = colorRampPalette(c("firebrick", "white", "steelblue"))(200),
title = "Matriz de CorrelaciĂ³n General",
mar = c(0, 0, 2, 0))

# ------------------------
# 2. Matriz para visitantes normales
# ------------------------
normales <- PS %>%
filter(GrupoGasto == "Normales") %>%
select(`Gasto total en pesos`, Edad, `Noches Hospedadas`, `Visitas Previas`) %>%
mutate_all(as.numeric)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Edad = .Primitive("as.double")(Edad)`.
## Caused by warning:
## ! NAs introducidos por coerciĂ³n
colnames(normales) <- nombre_vars
cor_normales <- cor(normales, use = "complete.obs")
corrplot(cor_normales,
method = "circle",
type = "lower",
tl.col = "black",
tl.cex = 1,
tl.srt = 45,
addCoef.col = "black",
number.cex = 0.7,
col = colorRampPalette(c("firebrick", "white", "steelblue"))(200),
title = "CorrelaciĂ³n - Visitantes Normales",
mar = c(0, 0, 2, 0))

# ------------------------
# 3. Matriz para outliers
# ------------------------
outliers <- PS %>%
filter(GrupoGasto == "Outliers") %>%
select(`Gasto total en pesos`, Edad, `Noches Hospedadas`, `Visitas Previas`) %>%
mutate_all(as.numeric)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Edad = .Primitive("as.double")(Edad)`.
## Caused by warning:
## ! NAs introducidos por coerciĂ³n
colnames(outliers) <- nombre_vars
cor_outliers <- cor(outliers, use = "complete.obs")
corrplot(cor_outliers,
method = "circle",
type = "lower",
tl.col = "black",
tl.cex = 1,
tl.srt = 45,
addCoef.col = "black",
number.cex = 0.7,
col = colorRampPalette(c("firebrick", "white", "steelblue"))(200),
title = "CorrelaciĂ³n - Outliers",
mar = c(0, 0, 2, 0))

V. AnĂ¡lisis de
Sentimiento
comentarios <- PS %>%
filter(!is.na(`Comentario RecomendaciĂ³n`)) %>%
tidytext::unnest_tokens(input = `Comentario RecomendaciĂ³n`, output = word) %>%
anti_join(get_stopwords(language = "es"))
## Joining with `by = join_by(word)`
lexico_es <- tibble(
word = c("bueno", "excelente", "agradable", "recomendado", "feliz", "malo", "terrible", "sucio", "caro", "pésimo"),
sentiment = c("positivo", "positivo", "positivo", "positivo", "positivo", "negativo", "negativo", "negativo", "negativo", "negativo")
)
sentimientos <- comentarios %>%
inner_join(lexico_es, by = "word") %>%
count(sentiment, sort = TRUE)
ggplot(sentimientos, aes(x = reorder(sentiment, n), y = n, fill = sentiment)) +
geom_col() +
coord_flip() +
labs(title = "AnĂ¡lisis de sentimiento de los comentarios (Español)", x = "Sentimiento", y = "Frecuencia") +
theme_minimal()

VI. Nube de Palabras
por RecomendaciĂ³n
# -----------------------------
# Limpieza y procesamiento - SĂ Recomienda
# -----------------------------
recomienda <- PS %>%
filter(!is.na(`Si Recomienda`)) %>%
select(`Si Recomienda`) %>%
rename(texto = `Si Recomienda`) %>%
unnest_tokens(word, texto) %>%
filter(!word %in% stopwords::stopwords("es"),
!str_detect(word, "\\d"), # elimina nĂºmeros
str_length(word) > 2, # evita palabras cortas irrelevantes
!is.na(word)) %>%
mutate(word = case_when( # corregir sinĂ³nimos o variantes
word == "banio" ~ "baño",
word == "limpia" ~ "limpio",
word == "recomiendo" ~ "recomendar",
TRUE ~ word
)) %>%
count(word, sort = TRUE)
# -----------------------------
# Limpieza y procesamiento - No Recomienda
# -----------------------------
no_recomienda <- PS %>%
filter(!is.na(`No Recomienda`)) %>%
select(`No Recomienda`) %>%
rename(texto = `No Recomienda`) %>%
unnest_tokens(word, texto) %>%
filter(!word %in% stopwords::stopwords("es"),
!str_detect(word, "\\d"),
str_length(word) > 2,
!is.na(word)) %>%
mutate(word = case_when(
word == "banio" ~ "baño",
word == "sucio" ~ "suciedad",
TRUE ~ word
)) %>%
count(word, sort = TRUE)
# -----------------------------
# Graficar nubes de palabras mejoradas
# -----------------------------
par(mfrow = c(1, 2)) # panel de 2 grĂ¡ficos
# Wordcloud para SĂ Recomienda
if (nrow(recomienda) > 0) {
wordcloud(words = recomienda$word,
freq = recomienda$n,
scale = c(4, 0.8),
max.words = 100,
colors = brewer.pal(8, "Dark2"),
random.order = FALSE)
title("Lugares que SĂ Recomiendan")
} else {
plot.new(); title("Sin datos: SĂ Recomienda")
}
# Wordcloud para No Recomienda
if (nrow(no_recomienda) > 0) {
wordcloud(words = no_recomienda$word,
freq = no_recomienda$n,
scale = c(4, 0.8),
max.words = 100,
colors = brewer.pal(8, "Reds"),
random.order = FALSE)
title("Lugares que No Recomiendan")
} else {
plot.new(); title("Sin datos: No Recomienda")
}

par(mfrow = c(1, 1)) # restaurar panel normal