📊 Descriptive Statistics
numeric_data <- dplyr::select(uk_tourism, where(is.numeric))
desc_stats <- psych::describe(numeric_data)
desc_stats %>%
dplyr::select(mean, sd, min, max, skew, kurtosis) %>%
round(2) %>%
knitr::kable("html", caption = "Descriptive Statistics of UK Tourism Data") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Descriptive Statistics of UK Tourism Data
|
|
mean
|
sd
|
min
|
max
|
skew
|
kurtosis
|
|
Year
|
2016.50
|
4.18
|
2010.00
|
2023.00
|
0.00
|
-1.46
|
|
Total_Visitors
|
30.09
|
10.23
|
6.20
|
40.90
|
-1.17
|
0.18
|
|
Visitor_Expenditure
|
18.72
|
6.74
|
3.90
|
28.40
|
-0.93
|
-0.10
|
|
Expenditure_Per_Visitor
|
618.52
|
42.10
|
558.56
|
707.24
|
0.67
|
-0.31
|
|
Exchange_Rate_USD
|
1.43
|
0.14
|
1.24
|
1.65
|
0.21
|
-1.77
|
|
Exchange_Rate_EUR
|
1.19
|
0.07
|
1.13
|
1.38
|
1.65
|
2.26
|
|
Purpose_Holiday
|
11.66
|
4.06
|
2.00
|
16.00
|
-1.27
|
0.37
|
|
Purpose_Business
|
6.63
|
2.80
|
0.50
|
9.20
|
-1.10
|
-0.32
|
|
Purpose_VFR
|
9.42
|
2.61
|
3.30
|
12.70
|
-1.03
|
0.19
|
|
Purpose_Other
|
2.38
|
0.98
|
0.40
|
3.60
|
-0.56
|
-0.81
|
|
Brexit_Period
|
0.57
|
0.51
|
0.00
|
1.00
|
-0.26
|
-2.07
|
|
COVID_Period
|
0.14
|
0.36
|
0.00
|
1.00
|
1.83
|
1.45
|
🧪 Hypothesis 1: Visitor Expenditure ➜ Total Visitors
cor_h1 <- cor.test(uk_tourism$Visitor_Expenditure, uk_tourism$Total_Visitors)
model_h1 <- lm(Total_Visitors ~ Visitor_Expenditure, data = uk_tourism)
summary_h1 <- summary(model_h1)
h1_table <- data.frame(
Metric = c("Correlation", "Correlation p-value", "Regression Slope", "Regression p-value", "R-squared"),
Value = c(
round(cor_h1$estimate, 4),
round(cor_h1$p.value, 4),
round(summary_h1$coefficients[2, 1], 4),
round(summary_h1$coefficients[2, 4], 4),
round(summary_h1$r.squared, 4)
)
)
kable(h1_table, caption = "Hypothesis 1: Expenditure Impact Results")
Hypothesis 1: Expenditure Impact Results
| Correlation |
0.9814 |
| Correlation p-value |
0.0000 |
| Regression Slope |
1.4896 |
| Regression p-value |
0.0000 |
| R-squared |
0.9632 |
ggplot(uk_tourism, aes(x = Visitor_Expenditure, y = Total_Visitors)) +
geom_point() +
geom_smooth(method = "lm", color = "blue") +
labs(title = "Visitor Expenditure vs Total Visitors")
## `geom_smooth()` using formula = 'y ~ x'

💱 Hypothesis 2: Exchange Rate ➜ Total Visitors
cor_usd <- cor.test(uk_tourism$Exchange_Rate_USD, uk_tourism$Total_Visitors)
model_usd <- lm(Total_Visitors ~ Exchange_Rate_USD, data = uk_tourism)
cor_eur <- cor.test(uk_tourism$Exchange_Rate_EUR, uk_tourism$Total_Visitors)
model_eur <- lm(Total_Visitors ~ Exchange_Rate_EUR, data = uk_tourism)
h2_usd <- data.frame(
Metric = c("Correlation", "p-value", "Slope", "Reg. p", "R-squared"),
USD = c(
round(cor_usd$estimate, 4),
round(cor_usd$p.value, 4),
round(coef(model_usd)[2], 4),
round(summary(model_usd)$coefficients[2, 4], 4),
round(summary(model_usd)$r.squared, 4)
)
)
h2_eur <- data.frame(
Metric = c("Correlation", "p-value", "Slope", "Reg. p", "R-squared"),
EUR = c(
round(cor_eur$estimate, 4),
round(cor_eur$p.value, 4),
round(coef(model_eur)[2], 4),
round(summary(model_eur)$coefficients[2, 4], 4),
round(summary(model_eur)$r.squared, 4)
)
)
kable(h2_usd, caption = "Hypothesis 2: USD Exchange Rate Results")
Hypothesis 2: USD Exchange Rate Results
| Correlation |
0.1194 |
| p-value |
0.6843 |
| Slope |
8.5452 |
| Reg. p |
0.6843 |
| R-squared |
0.0143 |
kable(h2_eur, caption = "Hypothesis 2: EUR Exchange Rate Results")
Hypothesis 2: EUR Exchange Rate Results
| Correlation |
0.2204 |
| p-value |
0.4489 |
| Slope |
33.8461 |
| Reg. p |
0.4489 |
| R-squared |
0.0486 |
uk_tourism %>%
pivot_longer(cols = c(Exchange_Rate_USD, Exchange_Rate_EUR),
names_to = "Currency", values_to = "Rate") %>%
ggplot(aes(x = Year, y = Rate, color = Currency)) +
geom_line(size = 1) + geom_point() +
labs(title = "Exchange Rates Over Time")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

🎯 Hypothesis 3: Purpose of Visit ➜ Visitor Variation
purpose_data <- uk_tourism %>%
pivot_longer(cols = c(Purpose_Holiday, Purpose_Business, Purpose_VFR, Purpose_Other),
names_to = "Purpose", values_to = "Visitors") %>%
mutate(Purpose = gsub("Purpose_", "", Purpose))
anova_model <- aov(Visitors ~ Purpose, data = purpose_data)
tukey_result <- TukeyHSD(anova_model)
kable(summary(anova_model)[[1]], caption = "ANOVA Results for Purpose of Visit")
ANOVA Results for Purpose of Visit
| Purpose |
3 |
672.2720 |
224.09065 |
27.90081 |
0 |
| Residuals |
52 |
417.6479 |
8.03169 |
NA |
NA |
kable(tukey_result$Purpose, caption = "Tukey HSD Post-Hoc Test")
Tukey HSD Post-Hoc Test
| Holiday-Business |
5.035714 |
2.1927492 |
7.8786794 |
0.0001118 |
| Other-Business |
-4.250000 |
-7.0929651 |
-1.4070349 |
0.0012438 |
| VFR-Business |
2.792857 |
-0.0501079 |
5.6358222 |
0.0558871 |
| Other-Holiday |
-9.285714 |
-12.1286794 |
-6.4427492 |
0.0000000 |
| VFR-Holiday |
-2.242857 |
-5.0858222 |
0.6001079 |
0.1686217 |
| VFR-Other |
7.042857 |
4.1998921 |
9.8858222 |
0.0000001 |
ggplot(purpose_data, aes(x = Purpose, y = Visitors, fill = Purpose)) +
geom_boxplot() +
labs(title = "Visitor Counts by Purpose of Visit")

🚧 Hypothesis 4: Travel Restrictions ➜ Visitor Drop
model_h4 <- lm(Total_Visitors ~ Travel_Restrictions, data = uk_tourism)
summary_h4 <- summary(model_h4)
# Check available coefficient names
available_coefs <- rownames(summary_h4$coefficients)
# Create a safe table even if some levels are missing
h4_table <- data.frame(
Coefficient = c("Restricted", "Banned"),
Estimate = c(
if ("Travel_RestrictionsRestricted" %in% available_coefs)
round(summary_h4$coefficients["Travel_RestrictionsRestricted", 1], 4)
else NA,
if ("Travel_RestrictionsBanned" %in% available_coefs)
round(summary_h4$coefficients["Travel_RestrictionsBanned", 1], 4)
else NA
)
)
kable(h4_table, caption = "Hypothesis 4: Travel Restrictions Coefficients")
Hypothesis 4: Travel Restrictions Coefficients
| Restricted |
3.55 |
| Banned |
NA |
ggplot(uk_tourism, aes(x = Travel_Restrictions, y = Total_Visitors, fill = Travel_Restrictions)) +
geom_boxplot() +
labs(title = "Total Visitors by Travel Restriction Level")
