CDS301 Final Project

Heyeon Kim, Jaehyoung Jang

Dec 18 2024

Temperature Trends in Korea from 1984 to 2024

Research Question: “Are Summer and Winter Getting Hotter in South Korea?”

Contents

  1. About the Dataset
  2. Sparkline
  3. Line plot
  4. Moving average
  5. Conclusion

About the Dataset

  • 8 Dataset

  • 6 Columns

  • Total 595399 rows

  • Date: Year(1984-2024), Month, Day -Location: Seoul, Incheon, Busan, Daegu, Daejeon, Gwangju, Ulsan, Wonju

  • Mean Temperature _ Minimum Temperature _ Maximum Temperature

Data sourse: Korean Meteorological Administration

Package load

library(readr)
library(tidyr)
library(ggplot2)
library(ggthemes)
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## 
## 다음의 패키지를 부착합니다: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Read CSV file

Busan <- read_csv("Busan.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Daegu <- read_csv("Daegu.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Daejeon <- read_csv("Daejeon.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Gwangju <- read_csv("Gwangju.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Incheon <- read_csv("Incheon.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Seoul <- read_csv("Seoul.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ulsan <- read_csv("Ulsan.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Wonju <- read_csv("Wonju.csv")
## Rows: 14916 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Location
## dbl  (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data processing

Busan <- Busan %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Busan <- drop_na(Busan)
Busan$Year <- as.numeric(as.character(Busan$Year))

Daegu <- Daegu %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Daegu <- drop_na(Daegu)
Daegu$Year <- as.numeric(as.character(Daegu$Year))

Daejeon <- Daejeon %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Daejeon <- drop_na(Daejeon)
Daejeon$Year <- as.numeric(as.character(Daejeon$Year))

Gwangju <- Gwangju %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Gwangju <- drop_na(Gwangju)
Gwangju$Year <- as.numeric(as.character(Gwangju$Year))

Incheon <- Incheon %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Incheon <- drop_na(Incheon)
Incheon$Year <- as.numeric(as.character(Incheon$Year))

Seoul <- Seoul %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Seoul <- drop_na(Seoul)
Seoul$Year <- as.numeric(as.character(Seoul$Year))

Ulsan <- Ulsan %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Ulsan <- drop_na(Ulsan)
Ulsan$Year <- as.numeric(as.character(Ulsan$Year))

Wonju <- Wonju %>%
  separate(col = Date, into = c("Year", "Month", "Day"), sep="-")

Wonju <- drop_na(Wonju)
Wonju$Year <- as.numeric(as.character(Wonju$Year))

Combined data

Korea <- rbind(Busan, Daegu, Daejeon, Gwangju, Incheon, Seoul, Ulsan, Wonju)

Korea <- Korea %>%
  rename('maxT' = 'Maximum Temperature(°C)',
         'miniT' = 'Minimum Temperature(°C)',
         'meanT' = 'Mean Temperature(°C)')

Max Temperature

HW_df <- Korea %>%
  select("Location", "Year", "Month", "Day", "maxT") 

Min Temperature

CW_df <- Korea %>%
  select("Location", "Year", "Month", "Day", "miniT") 

Sparkline

August Mean Max Temperature

Aug_HW <- HW_df %>%
  filter(Month == "08")

Aug_HWL <- Aug_HW %>%
  group_by(Year, Location) %>%
  summarise(mean(maxT))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Aug_HWT <- Aug_HW %>%
  group_by(Year) %>%
  summarise(mean(maxT))

Aug_HWT <- Aug_HWT %>%
  mutate(Location = "Total") %>%
  select("Year", "Location", "mean(maxT)")

Aug_Hmean <- rbind(Aug_HWL, Aug_HWT)

Aug_Hmean <- Aug_Hmean %>%
  rename('mmaxT' = 'mean(maxT)')

Aug_Hmean$'mmaxT' <- round(Aug_Hmean$'mmaxT', 1)

Aug_Hmean <- Aug_Hmean %>%
  mutate(Location = factor(Location, levels = c("Total", setdiff(unique(Location), "Total"))))

Aug_Mins <- group_by(Aug_Hmean, Location) %>% slice(which.min(mmaxT))
Aug_Maxs <- group_by(Aug_Hmean, Location) %>% slice(which.max(mmaxT))
Aug_Ends <- group_by(Aug_Hmean, Location) %>% filter(Year == max(Year))
Aug_quarts <- Aug_Hmean %>% group_by(Location) %>%
  summarize(Aug_quart1 = quantile(mmaxT, 0.25),
            Aug_quart2 = quantile(mmaxT, 0.75)) %>%
  right_join(Aug_Hmean)
## Joining with `by = join_by(Location)`
ggplot(Aug_Hmean, aes(x = Year, y = mmaxT)) + 
  facet_grid(Location ~ ., scales = "free_y") + 
  geom_ribbon(data = Aug_quarts, aes(ymin = Aug_quart1, ymax = Aug_quart2), fill = 'grey90') +
  geom_line(linewidth = 0.3) +
  geom_point(data = Aug_Mins, col = 'blue') +
  geom_point(data = Aug_Maxs, col = 'red') +
  geom_text(data = Aug_Mins, aes(label = round(mmaxT, 1)), size = 3, vjust = -1) +
  geom_text(data = Aug_Maxs, aes(label = round(mmaxT, 1)), size = 3, vjust = 2.5) +
  geom_text(data = Aug_Ends, aes(label = round(mmaxT, 1)), size = 3, hjust = 0, nudge_x = 1) +
  geom_text(data = Aug_Ends, aes(label = Location), size = 3, hjust = 0, nudge_x = 5) +
  expand_limits(x = max(Aug_Hmean$Year) + (0.25 * (max(Aug_Hmean$Year) - min(Aug_Hmean$Year)))) +
  scale_x_continuous(breaks = seq(min(Aug_Hmean$Year), max(Aug_Hmean$Year), 5)) +
  scale_y_continuous(expand = c(0.1, 0)) +
  theme_tufte(base_size = 12) +
  theme(
    axis.title = element_blank(), 
    axis.text.y = element_blank(), 
    axis.ticks = element_blank(), 
    strip.text = element_blank()
  )

Aug_filtered <- Aug_Hmean[Aug_Hmean$Year %in% c(1984, 2024), ]

Aug_1984 <- Aug_filtered[Aug_filtered$Year == 1984, ]
Aug_2024 <- Aug_filtered[Aug_filtered$Year == 2024, ]

Aug_diff <- merge(Aug_1984, Aug_2024, by = "Location", suffixes = c("_1984", "_2024"))

Aug_diff <- Aug_diff %>%
  mutate(Difference = mmaxT_2024 - mmaxT_1984)
  • In August, the overall trend shows a steady rise in maximum temperatures from 1984 to 2024

  • Except for Daegu, all cities reached their highest temperatures after 2010

  • Comparing 1984 and 2024, Seoul and Incheon showed the largest increases, rising by 2.8°C and 2.7°C

Mean Min Temperature

Jan_CW <- CW_df %>%
  filter(Month == "01")

Jan_CWL <- Jan_CW %>%
  group_by(Year, Location) %>%
  summarise(mean(miniT))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Jan_CWT <- Jan_CW %>%
  group_by(Year) %>%
  summarise(mean(miniT))

Jan_CWT <- Jan_CWT %>%
  mutate(Location = "Total") %>%
  select("Year", "Location", "mean(miniT)")

Jan_Cmean <- rbind(Jan_CWL, Jan_CWT)

Jan_Cmean <- Jan_Cmean %>%
  rename('mminiT' = 'mean(miniT)')

Jan_Cmean$'mminiT' <- round(Jan_Cmean$'mminiT', 1)

Jan_Cmean <- Jan_Cmean %>%
  mutate(Location = factor(Location, levels = c("Total", setdiff(unique(Location), "Total"))))

Jan_Mins <- group_by(Jan_Cmean, Location) %>% slice(which.min(mminiT))
Jan_Maxs <- group_by(Jan_Cmean, Location) %>% slice(which.max(mminiT))
Jan_Ends <- group_by(Jan_Cmean, Location) %>% filter(Year == max(Year))
Jan_quarts <- Jan_Cmean %>% group_by(Location) %>%
  summarize(Jan_quart1 = quantile(mminiT, 0.25),
            Jan_quart2 = quantile(mminiT, 0.75)) %>%
  right_join(Jan_Cmean)
## Joining with `by = join_by(Location)`
ggplot(Jan_Cmean, aes(x = Year, y = mminiT)) + 
  facet_grid(Location ~ ., scales = "free_y") + 
  geom_ribbon(data = Jan_quarts, aes(ymin = Jan_quart1, ymax = Jan_quart2), fill = 'grey90') +
  geom_line(linewidth = 0.3) +
  geom_point(data = Jan_Mins, col = 'blue') +
  geom_point(data = Jan_Maxs, col = 'red') +
  geom_text(data = Jan_Mins, aes(label = round(mminiT, 1)), size = 3, vjust = -1) +
  geom_text(data = Jan_Maxs, aes(label = round(mminiT, 1)), size = 3, vjust = 2.5) +
  geom_text(data = Jan_Ends, aes(label = round(mminiT, 1)), size = 3, hjust = 0, nudge_x = 1) +
  geom_text(data = Jan_Ends, aes(label = Location), size = 3, hjust = 0, nudge_x = 5) +
  expand_limits(x = max(Jan_Cmean$Year) + (0.25 * (max(Jan_Cmean$Year) - min(Jan_Cmean$Year)))) +
  scale_x_continuous(breaks = seq(min(Jan_Cmean$Year), max(Jan_Cmean$Year), 5)) +
  scale_y_continuous(expand = c(0.1, 0)) +
  theme_tufte(base_size = 12) +
  theme(
    axis.title = element_blank(), 
    axis.text.y = element_blank(), 
    axis.ticks = element_blank(), 
    strip.text = element_blank()
  )

Jan_filtered <- Jan_Cmean[Jan_Cmean$Year %in% c(1984, 2024), ]

Jan_1984 <- Jan_filtered[Jan_filtered$Year == 1984, ]
Jan_2024 <- Jan_filtered[Jan_filtered$Year == 2024, ]

Jan_diff <- merge(Jan_1984, Jan_2024, by = "Location", suffixes = c("_1984", "_2024"))

Jan_diff <- Jan_diff %>%
  mutate(Difference = mminiT_2024 - mminiT_1984)
  • In January, the overall trend shows a steady increase in minimum temperatures from 1984 to 2024, with a sharp drop in 2011 and a significant rise in 2020.

  • Comparing 1984 and 2024, Wonju showed the largest temperature increase, rising by 15.6°C.

  • The coldest northern regions of South Korea have also experienced temperature increases, making their minimum temperatures in 2024 similar to those of other regions

Line Plot

August

Total <- Korea %>%
  group_by(Year, Month, Day) %>%
  summarise(mean(maxT), mean(miniT))
## `summarise()` has grouped output by 'Year', 'Month'. You can override using the
## `.groups` argument.
Total <- Total %>%
  mutate(Location = "Total") %>%
  select("Year", "Month", "Day", "Location", "mean(maxT)", "mean(miniT)") %>%
  filter(Year == 1984 | Year == 2024)

Total <- Total %>%
  rename('mmaxT' = 'mean(maxT)',
         'mminiT' = 'mean(miniT)')

Total$'mmaxT' <- round(Total$'mmaxT', 1)
Total$'mminiT' <- round(Total$'mminiT', 1)

Aug_Total <- Total %>%
  filter(Month == "08") %>%
  select("Year", "Month", "Day", "mmaxT")

Aug_Count <- Aug_Total %>%
  group_by(Year) %>%
  summarize(days_over_33 = sum(mmaxT >= 33))

ggplot(Aug_Total, aes(x = Day, y = mmaxT, group = Year, color = as.factor(Year))) +
  geom_hline(yintercept = 33, color = "gray", linetype = "solid", size = 0.7) +
  geom_line(linewidth = 1, alpha = 0.7) +
  scale_color_manual(values = c("1984" = "skyblue", "2024" = "red"), 
                     name = "Year") +
  labs(x = "Day of August", 
       y = "Maximum Temperature (°C)", 
       title = "Daily Maximum Temperatures in August Between 1984 and 2024") +
  theme_minimal() +
  theme_tufte()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  • August minimum temperatures in 2024 were higher than in 1984 on nearly every day, with more days exceeding the heatwave threshold of 33°C.

January

Jan_Total <- Total %>%
  filter(Month == "01") %>%
  select("Year", "Month", "Day", "mminiT")

Jan_Count <- Jan_Total %>%
  group_by(Year) %>%
  summarize(days_under_0 = sum(mminiT < 0))

ggplot(Jan_Total, aes(x = Day, y = mminiT, group = Year, color = as.factor(Year))) +
  geom_hline(yintercept = 0, color = "gray", linetype = "solid", size = 0.7) +
  geom_line(linewidth = 1, alpha = 0.7) +
  scale_color_manual(values = c("1984" = "blue", "2024" = "orange"), 
                     name = "Year") +
  labs(x = "Day of January", 
       y = "Minimum Temperature (°C)", 
       title = "Daily Minimum Temperatures in January Between 1984 and 2024") +
  theme_minimal() +
  theme_tufte() 

  • January minimum temperatures in 2024 were higher than in 1984 on nearly every day, with temperatures reaching above zero

Moving Average

August

Aug_Mean <- Korea %>%
  filter(Month == "08")

Aug_Mean <- Aug_Mean %>%
  group_by(Year) %>%
  summarise(mean(meanT))

Aug_Mean <- Aug_Mean %>%
  rename('mmeanT' = 'mean(meanT)')
  
Aug_Mean$'mmeanT' <- round(Aug_Mean$'mmeanT', 1)

Aug_Mean$Aug_Decade <- (Aug_Mean$Year %/% 10) * 10 
Aug_avg <- Aug_Mean %>%
  group_by(Aug_Decade) %>%
  summarize(Decade_Avg = mean(mmeanT, na.rm = TRUE))

Aug_Mean$Aug_7ma <- rollmean(Aug_Mean$mmeanT, 7, fill = NA, align = "center")

ggplot() +
  geom_bar(data = Aug_avg, aes(x = Aug_Decade, y = Decade_Avg, fill = "Decadal Average"), 
           stat = "identity", alpha = 0.8, width = 8) +
  geom_line(data = Aug_Mean, aes(x = Year, y = mmeanT, color = "Yearly Temperature"), size = 1) +
  geom_line(data = Aug_Mean, aes(x = Year, y = Aug_7ma, color = "7-Year Moving Average"), size = 1) +
  labs(
    title = "Change in August Mean Temperature (1984-2024)",
    x = "Year",
    y = "Mean Temperature (°C)",
    color = "",
    fill = ""
  ) +
  scale_color_manual(values = c("Yearly Temperature" = "yellowgreen", "7-Year Moving Average" = "blue")) +
  scale_fill_manual(values = c("Decadal Average" = "orange")) +
  theme_minimal() +
  theme_tufte()
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

  • January mean temperatures are rising overall, indicating warmer winters

  • The year 2011 appears to be an outlier.

January

Jan_Mean <- Korea %>%
  filter(Month == "01")

Jan_Mean <- Jan_Mean %>%
  group_by(Year) %>%
  summarise(mean(meanT))

Jan_Mean <- Jan_Mean %>%
  rename('mmeanT' = 'mean(meanT)')

Jan_Mean$'mmeanT' <- round(Jan_Mean$'mmeanT', 1)

Jan_Mean$Jan_Decade <- (Jan_Mean$Year %/% 10) * 10 
Jan_avg <- Jan_Mean %>%
  group_by(Jan_Decade) %>%
  summarize(Decade_Avg = mean(mmeanT, na.rm = TRUE))

Jan_Mean$Jan_7ma <- rollmean(Jan_Mean$mmeanT, 7, fill = NA, align = "center")

ggplot() +
  geom_bar(data = Jan_avg, aes(x = Jan_Decade, y = Decade_Avg, fill = "Decadal Average"), 
           stat = "identity", alpha = 0.7, width = 9) +
  geom_line(data = Jan_Mean, aes(x = Year, y = mmeanT, color = "Yearly Temperature"), size = 1) +
  geom_line(data = Jan_Mean, aes(x = Year, y = Jan_7ma, color = "7-Year Moving Average"), size = 1) +
  labs(
    title = "Change in January Mean Temperature (1984-2024)",
    x = "Year",
    y = "Mean Temperature (°C)",
    color = "",
    fill = ""
  ) +
  scale_color_manual(values = c("Yearly Temperature" = "yellowgreen", "7-Year Moving Average" = "red")) +
  scale_fill_manual(values = c("Decadal Average" = "blue")) +
  theme_minimal() +
  theme_tufte() 
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).

  • August mean temperatures are rising overall, indicating hotter summers.

Conclusion

Winter:

Both minimum and average temperatures in January are rising, with extreme cold becoming less frequent.

Winters are gradually getting warmer over time.

Summer:

Both maximum and average temperatures in August have steadily increased since 1984.

Summers are gradually getting hotter with more frequent and prolonged heatwaves.