Seoul Tourism

Author

Asher

Introduction: For my Business Analytics class my group decided to make a luggage tourism business based in Seoul, South Korea. My role in the group’s final project was to develop maps and graphs based on location, interest, travel popularity and shopping satisfaction. Below is the code with step by step instruction on how I completed each visualization.

From Lines 11-16 I call in the appropriate library packages

setwd("/Users/asherscott/Desktop/Data 110")
library(leaflet)

Warning: package 'leaflet' was built under R version 4.4.1

library(ggplot2)
library(highcharter)

Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(htmlwidgets)  
library(webshot) 
library(scales)
library(tidyr)

From Lines 25–66, I loaded the Hotspots2 dataset that contains key locations our business would offer luggage transportation for. From there, I created a Leaflet map that groups locations under a common “Hotel” category, removes theme parks, assigns distinct colors by location type, and visualizes the points on a map with an accompanying legend. I then saved the interactive map as an HTML file using saveWidget() and converted it to a high-resolution PNG using webshot().

Hotspots2 <- read.csv("Hotspots2.csv") %>%
  mutate(
    Lng = ifelse(Name == "Starfield COEX Mall", 127.0589, Lng),
    Type = case_when(
      grepl("Hotel|InterContinental|Westin", Name) ~ "Hotel",
      TRUE ~ Type )) %>%
  filter(!Type %in% c("Theme Park"))  

palette <- colorFactor(
  palette = c("orange", "red", "purple", "blue", "green", "hotpink"),
  domain = Hotspots2$Type
)


map_with_airport <- leaflet(Hotspots2) %>%
  setView(lng = 126.9971, lat = 37.5503, zoom = 11) %>%  
  addProviderTiles("Esri.WorldStreetMap") %>%
  addCircleMarkers(
    lng = ~Lng,
    lat = ~Lat,
    radius = ~ifelse(Type == "Airport", 10, 8),  
    color = "black",
    weight = 2,
    fillColor = ~palette(Type),
    fillOpacity = 0.7,
    popup = ~Name
  ) %>%
  addLegend(
    "topright",
    pal = palette,
    values = ~Type,
    title = "Location Type",
    opacity = 1
  )

map_with_airport

saveWidget(map_with_airport, "map_with_airport.html", selfcontained = TRUE)

webshot("map_with_airport.html", file = "map_finale.png", vwidth = 1000, vheight = 800)

From Lines 70-79 I reset the working directory to /Users/asherscott/Desktop/Data 4Fun, loaded the Seoul.csv file into the ST data set using read_csv(), and displayed the first few rows with head().

setwd("/Users/asherscott/Desktop/Data 4Fun")
ST <- read.csv("Seoul.csv")
ST <- ST %>%
  mutate(
    Tourism_total = as.numeric(gsub(",", "", Tourism_total)),
    Tourism_capital = as.numeric(gsub(",", "", Tourism_capital))
  )
head(ST)

  Year Tourism_total Tourism_capital Nat_Percent
1 2014      14201516        11425620        80.4
2 2015      13231651        10413209        78.7
3 2016      17241823        13448622        78.0
4 2017      13335758        10506618        78.8
5 2018      15346879        12188460        79.4
6 2019      17502756        13377105        76.4

From Lines 83-111 I used ggplot() and gem_col() to create and format an overlapping bar chart that compares the yearly rate of tourism between Seoul and all of South Korea. As you can see every year Seoul individually makes up 3/4 of all tourism in the country.

tourism_plot <- ggplot(ST, aes(x = factor(Year))) +
  geom_col( aes(y = Tourism_total / 1e6, fill = "Total Visitors"),
    position = "identity", alpha = 0.5, color = "black") +
  geom_col( aes(y = Tourism_capital / 1e6, fill = "Seoul Visitors"),
    position = "identity", alpha = 0.7, color = "black") +
  scale_fill_manual( values = c("Total Visitors" = "blue", "Seoul Visitors" = "red")) +
  labs( title = "Tourism Trends Over Time",
    x = "Year",
    y = "Number of Tourists (Millions)",
    fill = "Tourist Type") +
  theme_minimal(base_size = 13) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank()
  )
tourism_plot

ggsave(
  filename = "tourism_trends_plot.png",
  plot = tourism_plot,
  width = 8,
  height = 5,
  dpi = 300
)

From Lines 115-118 I loaded in the shopping data set.

Shop <- read.csv("Shopping_Int.csv") 
head(Shop)

  Year Rate Satisfactory  X X.1 X.2 X.3
1 2024 58.2         58.6 NA  NA  NA  NA
2 2023 57.9         60.6 NA  NA  NA  NA
3 2022 51.5         55.0 NA  NA  NA  NA
4 2021 33.5         37.1 NA  NA  NA  NA
5 2020 16.1         19.7 NA  NA  NA  NA
6 2019 66.2         70.0 NA  NA  NA  NA

From Lines 122-158 I created a combined bar-and-line visualization showing shopping rates and tourist satisfaction by year by ensuring the Year variable was treated as categorical, dynamically scaling the y-axis based on the data using max(), constructing the chart with geom_bar(), geom_line(), and geom_point().

Shop$Year <- as.factor(Shop$Year)

y_max <- max(c(Shop$Rate, Shop$Satisfactory), na.rm = TRUE) + 5
shopping_plot <- ggplot(Shop, aes(x = Year)) +
  
  geom_bar(aes(y = Rate, fill = "Shopping Rate"), stat = "identity") +
  geom_line(aes(y = Satisfactory, color = "Satisfactory"), group = 1, size = 1) +
  geom_point(aes(y = Satisfactory, color = "Satisfactory"), size = 3) +
  
  geom_text(
    aes(y = Satisfactory, label = Satisfactory),
    vjust = -1, color = "black", size = 3.5, show.legend = FALSE) +
  
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(
    title = "Shopping Rate and Tourist Satisfaction by Year",
    x = "Year",
    y = "Percentage",
    fill = NULL,
    color = NULL) +
  
  scale_fill_manual(values = c("Shopping Rate" = "steelblue")) +
  scale_color_manual(values = c("Satisfactory" = "black")) +
  ylim(0, y_max)

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

shopping_plot

ggsave(
  filename = "shopping_satisfaction_plot.png",
  plot = shopping_plot,
  width = 8,
  height = 5,
  dpi = 300
)

From Lines 162-170 I converted the Year variable to numeric in both the Shop and ST datasets, merged them by Year into a single dataset, filtered the merged data to include only years 2018–2024, and displayed the resulting combined dataset.

Shop$Year <- as.numeric(as.character(Shop$Year))
ST$Year <- as.numeric(as.character(ST$Year))

Combined <- merge(Shop, ST, by = "Year", all = TRUE)
Combined_filtered <- subset(Combined, Year >= 2018 & Year <= 2024)

print(Combined_filtered)

   Year Rate Satisfactory  X X.1 X.2 X.3 Tourism_total Tourism_capital
5  2018 63.8         67.2 NA  NA  NA  NA      15346879        12188460
6  2019 66.2         70.0 NA  NA  NA  NA      17502756        13377105
7  2020 16.1         19.7 NA  NA  NA  NA       2519118         1075664
8  2021 33.5         37.1 NA  NA  NA  NA        967003          558341
9  2022 51.5         55.0 NA  NA  NA  NA       3198017         2634378
10 2023 57.9         60.6 NA  NA  NA  NA      11031665         8858427
11 2024 58.2         58.6 NA  NA  NA  NA      16375409        12854696
   Nat_Percent
5         79.4
6         76.4
7         42.7
8         57.7
9         82.4
10        80.3
11        78.5

From Lines 174-182 I converted Tourism_capital in the Combined_filtered dataset to actual people, estimated potential customers by applying the shopping Rate to this number, rounded the results for readability, and displayed the Year, Tourism_capital_real, Rate, and Potential_Customers columns.

Combined_filtered$Tourism_capital_real <- Combined_filtered$Tourism_capital  

Combined_filtered$Potential_Customers <- Combined_filtered$Tourism_capital_real * Combined_filtered$Rate / 10000

Combined_filtered$Potential_Customers <- round(Combined_filtered$Potential_Customers)

Combined_filtered[, c("Year", "Tourism_capital_real", "Rate", "Potential_Customers")]

   Year Tourism_capital_real Rate Potential_Customers
5  2018             12188460 63.8               77762
6  2019             13377105 66.2               88556
7  2020              1075664 16.1                1732
8  2021               558341 33.5                1870
9  2022              2634378 51.5               13567
10 2023              8858427 57.9               51290
11 2024             12854696 58.2               74814

From Lines 185-189 I loaded in the first Length-of-stay of stay data set.

LOS <- read.csv("LengthOfStay.csv") 
head(LOS)

  Year    AE AE_NT  DE DE_NT  X X.1
1 2024 1,752 1,353 328   238 NA  NA
2 2023 2,152 1,257 333   229 NA  NA
3 2022 3,047       360   345 NA  NA
4 2021 4,217       265   152 NA  NA
5 2020 3,885       167    85 NA  NA
6 2019 1,442 1,239 245   148 NA  NA

From Lines 193-196 I loaded in the second Length-of-stay data set.

LOS2 <- read.csv("LengthofStay2.csv") 
head(LOS2)

  Year    AE  DE DE_NT
1 2024 1,752 328   238
2 2023 2,152 333   229
3 2022 3,047 360   245
4 2021 4,217 265   152
5 2020 3,885 167    85
6 2019 1,442 245   148

From Lines 200-253 I cleaned the LOS2 dataset by converting the AE, DE, and DE_NT columns to numeric using the mutate() function, where AE = Average Expense, DE = Daily Expense, and DE_NT = Daily Expense with Travel. I then created two bar graphs: one showing all three expense categories and another showing only the daily expenses (DE and DE_NT, excluding AE). Both graphs were customized with colors, labels, themes, and saved as PNG files.

LOS2_clean <- LOS2 %>%
  mutate(across(c(AE, DE, DE_NT), ~ as.numeric(gsub(",", "", .))))

data_long_all <- LOS2_clean %>%
  pivot_longer(cols = -Year, names_to = "Category", values_to = "Count") %>%
  mutate(Category = case_when(
    Category == "AE" ~ "Average Expense",
    Category == "DE" ~ "Daily Expense",
    Category == "DE_NT" ~ "Daily Expense w/ transit",
    TRUE ~ Category))

plot_all <- ggplot(data_long_all, aes(x = factor(Year), y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  labs(title = "Tourism Expenses (All Categories)",
       x = "Year",
       y = "Mean Expense (USD)",
       fill = "Expense Type") +
  scale_y_continuous(labels = comma) +
  scale_fill_manual(values = c("steelblue", "orange", "forestgreen")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        axis.title = element_text(size = 12),
        legend.position = "bottom")

data_long_filtered <- LOS2_clean %>%
  select(Year, DE, DE_NT) %>%
  pivot_longer(cols = -Year, names_to = "Category", values_to = "Count") %>%
  mutate(Category = case_when(
    Category == "DE" ~ "Daily Expense",
    Category == "DE_NT" ~ "Daily Expense w/ transit",
    TRUE ~ Category))

plot_filtered <- ggplot(data_long_filtered, aes(x = factor(Year), y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.7) +
  labs(title = "Daily Tourism Expenses (Excluding Average Expense)",
       x = "Year",
       y = "Mean Daily Expense (USD)",
       fill = "Expense Type") +
  scale_y_continuous(labels = comma, limits = c(0, 400)) +
  scale_fill_manual(values = c("#FF6B6B", "#4ECDC4")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        axis.title = element_text(size = 12),
        legend.position = "bottom",
        panel.grid.major.x = element_blank())


print(plot_all)

print(plot_filtered)

ggsave("tourism_expenses_all.png", plot_all, width = 10, height = 6)
ggsave("daily_expenses_filtered.png", plot_filtered, width = 10, height = 6)