library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
#file.choose()
turismo <-read.csv("C:\\Users\\Sai\\Desktop\\Base de Datos SP Turismo Responsable.csv")
summary(turismo)
## StartDate EndDate Status IPAddress
## Length:935 Length:935 Min. :0 Length:935
## Class :character Class :character 1st Qu.:0 Class :character
## Mode :character Mode :character Median :0 Mode :character
## Mean :0
## 3rd Qu.:0
## Max. :0
##
## Progress Duration..in.seconds. Finished RecordedDate
## Min. : 50.00 Min. : 59.0 Min. :0.0000 Length:935
## 1st Qu.:100.00 1st Qu.: 183.0 1st Qu.:1.0000 Class :character
## Median :100.00 Median : 309.0 Median :1.0000 Mode :character
## Mean : 94.17 Mean : 2920.7 Mean :0.8834
## 3rd Qu.:100.00 3rd Qu.: 507.5 3rd Qu.:1.0000
## Max. :100.00 Max. :500660.0 Max. :1.0000
##
## ResponseId RecipientLastName RecipientFirstName RecipientEmail
## Length:935 Mode:logical Mode:logical Mode:logical
## Class :character NA's:935 NA's:935 NA's:935
## Mode :character
##
##
##
##
## ExternalReference LocationLatitude LocationLongitude DistributionChannel
## Mode:logical Min. :17.99 Min. :-122.399 Length:935
## NA's:935 1st Qu.:25.64 1st Qu.:-100.311 Class :character
## Median :25.74 Median :-100.260 Mode :character
## Mean :31.07 Mean : -61.751
## 3rd Qu.:41.66 3rd Qu.: -2.253
## Max. :47.21 Max. : 15.560
## NA's :109 NA's :109
## UserLanguage Q2 Q3_1 Q3_2
## Length:935 Min. :1.00 Min. :1.000 Min. :1.000
## Class :character 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.000
## Mode :character Median :1.00 Median :1.000 Median :2.000
## Mean :1.38 Mean :1.645 Mean :2.046
## 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :2.00 Max. :3.000 Max. :3.000
## NA's :79 NA's :79
## Q3_3 Q4_1 Q4_2 Q4_3 Q4_4
## Min. :1.00 Min. :1 Min. :1 Min. :1 Min. :1
## 1st Qu.:2.00 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1
## Median :2.00 Median :1 Median :1 Median :1 Median :1
## Mean :2.31 Mean :1 Mean :1 Mean :1 Mean :1
## 3rd Qu.:3.00 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1
## Max. :3.00 Max. :1 Max. :1 Max. :1 Max. :1
## NA's :79 NA's :115 NA's :233 NA's :398 NA's :561
## Q4_5 Q4_6 Q4_7 Q4_8 Q4_9
## Min. :1 Min. :1 Min. :1 Min. :1 Min. :1
## 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:1
## Median :1 Median :1 Median :1 Median :1 Median :1
## Mean :1 Mean :1 Mean :1 Mean :1 Mean :1
## 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1
## Max. :1 Max. :1 Max. :1 Max. :1 Max. :1
## NA's :562 NA's :433 NA's :328 NA's :454 NA's :501
## Q4_10 Q4_11 Q4_12 Q6_1 Q6_2
## Min. :1 Min. :1 Min. :1 Min. :1.000 Min. :1.000
## 1st Qu.:1 1st Qu.:1 1st Qu.:1 1st Qu.:4.000 1st Qu.:4.000
## Median :1 Median :1 Median :1 Median :5.000 Median :6.000
## Mean :1 Mean :1 Mean :1 Mean :5.039 Mean :5.352
## 3rd Qu.:1 3rd Qu.:1 3rd Qu.:1 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :1 Max. :1 Max. :1 Max. :7.000 Max. :7.000
## NA's :445 NA's :697 NA's :393
## Q6_3 Q6_4 Q6_5 Q6_6
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:4.000 1st Qu.:5.000
## Median :6.000 Median :5.000 Median :6.000 Median :6.000
## Mean :5.361 Mean :4.994 Mean :5.471 Mean :5.548
## 3rd Qu.:7.000 3rd Qu.:6.000 3rd Qu.:7.000 3rd Qu.:7.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q6_7 Q6_8 Q6_9 Q6_10 Q6_11
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:5.00 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:1.000
## Median :5.000 Median :7.00 Median :5.000 Median :5.000 Median :2.000
## Mean :4.748 Mean :5.96 Mean :4.551 Mean :4.811 Mean :2.718
## 3rd Qu.:6.000 3rd Qu.:7.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:4.000
## Max. :7.000 Max. :7.00 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q6_12 Q6_13 Q6_14 Q6_15
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:1.000
## Median :5.000 Median :4.000 Median :4.000 Median :2.000
## Mean :4.943 Mean :4.321 Mean :3.953 Mean :3.098
## 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :7.000
##
## Q6_16 Q6_17 Q6_18 Q7
## Min. :1.000 Min. :1.000 Min. :1.000 Min. : 0.000
## 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:3.000 1st Qu.: 2.000
## Median :6.000 Median :5.000 Median :4.000 Median : 3.000
## Mean :5.332 Mean :4.645 Mean :4.102 Mean : 3.723
## 3rd Qu.:7.000 3rd Qu.:6.000 3rd Qu.:5.000 3rd Qu.: 4.000
## Max. :7.000 Max. :7.000 Max. :7.000 Max. :70.000
## NA's :12
## Q10 Q9 Q12 Q12_1 Q11
## Min. :1.000 Min. :1.000 Min. :16.0 Min. :1.00 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:21.0 1st Qu.:1.00 1st Qu.:1.000
## Median :1.000 Median :2.000 Median :35.0 Median :2.00 Median :2.000
## Mean :1.535 Mean :1.929 Mean :39.5 Mean :2.45 Mean :1.569
## 3rd Qu.:1.000 3rd Qu.:3.000 3rd Qu.:55.0 3rd Qu.:3.00 3rd Qu.:2.000
## Max. :4.000 Max. :3.000 Max. :92.0 Max. :5.00 Max. :2.000
## NA's :13 NA's :11 NA's :2
## Q13 X
## Min. :1.00 Mode:logical
## 1st Qu.:1.00 NA's:935
## Median :1.00
## Mean :1.43
## 3rd Qu.:2.00
## Max. :2.00
## NA's :2
turismo <- turismo %>%
mutate(Q2 = ifelse(Q2 == 1, "Si", Q2))
turismo <- turismo %>%
mutate(Q2 = ifelse(Q2 == 2, "No", Q2))
ggplot(turismo, aes(x = Q2, fill = Q2)) +
geom_bar() +
labs(title = "¿Ha oído hablar del turismo responsable y/o sostenible?",
x = "Respuestas",
y = "Frecuencia") +
scale_fill_manual(values = c("No" = "lightsalmon2", "Si" = "indianred3"))
turismo <- turismo %>%
mutate(Q2 = ifelse(Q2 == 1, "Hombre", Q11))
turismo <- turismo %>%
mutate(Q2 = ifelse(Q2 == 2, "Mujer", Q11))
ggplot(turismo, aes(x = Q2, fill = Q2)) +
geom_bar() +
labs(title = "Sexo de los participantes de la encuesta",
x = "Respuestas",
y = "Frecuencia") +
scale_fill_manual(values = c("Hombre" = "lightblue1", "Mujer" = "peachpuff2"))
ggplot(data = turismo, aes(x = Q12, y = Q2)) +
geom_boxplot(fill = "#76EEC6", color = "black") +
labs(title = "Comparación de sexo por consciencia de turismo sustentable",
x = "Si saben de turismo responsable",
y = "Sexo")
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
turismo <- turismo[!is.na(turismo$Q13), ]
turismo <- turismo %>%
mutate(Q13 = ifelse(Q13 == 1, "México", Q13))
turismo <- turismo %>%
mutate(Q13 = ifelse(Q13 == 2, "España", Q13))
ggplot(turismo, aes(x = Q13, fill = Q13)) +
geom_bar() +
labs(title = "País en el que realiza la encuesta",
x = "Respuestas",
y = "Frecuencia") +
scale_fill_manual(values = c("México" = "pink", "España" = "cornsilk1"))
# Histograma para Q7 (Cantidad de viajes en el último año)
ggplot(data = turismo, aes(x = Q7)) +
geom_histogram(binwidth = 1, fill = "lightgoldenrod1", color = "black") +
labs(title = "Distribución de la Cantidad de Viajes en el Último Año",
x = "Cantidad de Viajes",
y = "Frecuencia")
## Warning: Removed 12 rows containing non-finite values (`stat_bin()`).
# Histograma para Q12 (Edad en años)
ggplot(data = turismo, aes(x = Q12)) +
geom_histogram(binwidth = 1, fill = "lavenderblush2", color = "black") +
labs(title = "Distribución de Edad",
x = "Edad",
y = "Frecuencia")
## Warning: Removed 2 rows containing non-finite values (`stat_bin()`).
ggplot(data = turismo, aes(x = Q13, y = Q12)) +
geom_boxplot(fill = "paleturquoise1", color = "black") +
labs(title = "Comparación de Edad por País",
x = "País",
y = "Edad")
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
# Histograma para Q12 (Edad en años)
ggplot(data = turismo, aes(x = Q6_1)) +
geom_histogram(binwidth = 1, fill = "#FFC1C1", color = "violetred4") +
labs(title = "Importancia de la sostenibilidad",
x = "En la escala de 1 al 7 soy una persona preocupada por la sostenibilidad económica, sociocultural y medioambiental.",
y = "Frecuencia")
turismo <- turismo[!is.na(turismo$Q9), ]
summary(turismo$Q9)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.927 3.000 3.000
turismo <- turismo[!is.na(turismo$Q9), ]
summary(turismo$Q9)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.927 3.000 3.000
n <- length(turismo$Q9)
prop_obs <- mean(turismo$Q9)
prop_expected <- 0.866
prop_test_result <- prop.test(x = sum(turismo$Q9 == 1), n = n, p = prop_expected, alternative = "two.sided")
print(prop_test_result)
##
## 1-sample proportions test with continuity correction
##
## data: sum(turismo$Q9 == 1) out of n, null probability prop_expected
## X-squared = 1656.2, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.866
## 95 percent confidence interval:
## 0.3770681 0.4414882
## sample estimates:
## p
## 0.4088937
summary(turismo$Q6_4)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 5.000 4.992 6.000 7.000
n <- length(turismo$Q6_4)
prop_obs <- mean(turismo$Q6_4)
prop_expected <- 0.5
prop_test_result <- prop.test(x = sum(turismo$Q6_4 <= 3), n = n, p = prop_expected, alternative = "less")
print(prop_test_result)
##
## 1-sample proportions test with continuity correction
##
## data: sum(turismo$Q6_4 <= 3) out of n, null probability prop_expected
## X-squared = 391.76, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is less than 0.5
## 95 percent confidence interval:
## 0.0000000 0.1955639
## sample estimates:
## p
## 0.1735358
###Hipótesis nula: la proporción de turistas españoles que afirman que compensarían la huella de carbono en su alojamiento vacacional es igual al 55%. ###Hipótesis alterna: la proporción de turistas españoles que afirman que compensarían la huella de carbono en su alojamiento vacacional es diferente al 55%.
summary(turismo$Q6_11)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 2.719 4.000 7.000
n <- length(turismo$Q6_11)
prop_obs <- mean(turismo$Q6_11)
prop_expected <- 0.55
prop_test_result <- prop.test(x = sum(turismo$Q6_11 == 1), n = n, p = prop_expected, alternative = "two.sided")
print(prop_test_result)
##
## 1-sample proportions test with continuity correction
##
## data: sum(turismo$Q6_11 == 1) out of n, null probability prop_expected
## X-squared = 74.745, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.55
## 95 percent confidence interval:
## 0.3760010 0.4403953
## sample estimates:
## p
## 0.4078091
turismo <- turismo[!is.na(turismo$Q7), ]
summary(turismo$Q7)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 3.000 3.764 4.000 70.000