library(knitr)
include_graphics("https://postfiles.pstatic.net/MjAyNDEyMThfMTAy/MDAxNzM0NTIxNjUwNTYx.tFFboe87k3ODvvvoKwjsREKTcIemE1lLyEcVGa3UKF0g.5sRioZVUqk-aFRhHCd_C0Jy3OXGAs2uaKKUi2G-z89Ag.JPEG/CDS301_22222.jpg?type=w773")include_graphics("https://postfiles.pstatic.net/MjAyNDEyMThfMTAx/MDAxNzM0NTIxNjQ3MTgz.Efliyk3uolzPTtQMAoq1-hpWQBxE4oAe6FoxGxoDKRIg.VA4REXMNfVYaxuN1-5gVpaxJqvoi5LuM7sb8TrDAwe8g.JPEG/CDS301_11111.jpg?type=w773")The first graph shows South Korea’s annual mean temperature from 1975 to 2019 by 7-year and 10-year moving averages. This graph shows the moving average of the annual mean temperature, making it difficult to analyze the moving average for individual months, specifically January, the representative month of winter in Korea, and August, the representative month of summer, in which months we are interested. Therefore, we will visualize the 7-year and 10-year moving average graph of the mean minimum temperature in January 1984 to 2024 in South Korea, and the 7-year and 10-year moving average graph of the mean maximum temperature in August
The second graph shows the monthly air temperature and precipitation in 13 regions of Korea into two periods (1971-2000 and 1981-2010) and shows the temperature as a line plot and precipitation as a bar plot. Since what we are interested about in this research is the daily temperature changes in January and August of 1984 and 2024, we will make a line plot with the x-axis as a date of month and the y-axis as the temperature to directly compare the temperatures in January and August of 1984 and 2024.
In the second graph, it is also difficult to compare temperature changes between regions, so we will select eight cities and create a sparkline to find the temperature changes over 40 years by city and see which cities had the largest temperature changes.
Prior to starting, the temperature, which is a numerical value, highlights the difficulty of visually inspecting temperature changes caused by fluctuations and anomalies.
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## 다음의 패키지를 부착합니다: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
The reader package was used to load CSV files.
Tidyr, ggplot2, ggthemes, and dplyr packages were used for data processing and to create moving averages, line plots, and sparklines.
The zoo package was used to make the moving average.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 14885 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Location
## dbl (3): Mean Temperature(°C), Minimum Temperature(°C), Maximum Temperature...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Busan <- Busan %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Busan <- drop_na(Busan)
Busan$Year <- as.numeric(as.character(Busan$Year))
Daegu <- Daegu %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Daegu <- drop_na(Daegu)
Daegu$Year <- as.numeric(as.character(Daegu$Year))
Daejeon <- Daejeon %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Daejeon <- drop_na(Daejeon)
Daejeon$Year <- as.numeric(as.character(Daejeon$Year))
Gwangju <- Gwangju %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Gwangju <- drop_na(Gwangju)
Gwangju$Year <- as.numeric(as.character(Gwangju$Year))
Incheon <- Incheon %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Incheon <- drop_na(Incheon)
Incheon$Year <- as.numeric(as.character(Incheon$Year))
Seoul <- Seoul %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Seoul <- drop_na(Seoul)
Seoul$Year <- as.numeric(as.character(Seoul$Year))
Ulsan <- Ulsan %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Ulsan <- drop_na(Ulsan)
Ulsan$Year <- as.numeric(as.character(Ulsan$Year))
Wonju <- Wonju %>%
separate(col = Date, into = c("Year", "Month", "Day"), sep="-")
Wonju <- drop_na(Wonju)
Wonju$Year <- as.numeric(as.character(Wonju$Year))
Korea <- rbind(Busan, Daegu, Daejeon, Gwangju, Incheon, Seoul, Ulsan, Wonju)
Korea <- Korea %>%
rename('maxT' = 'Maximum Temperature(°C)',
'miniT' = 'Minimum Temperature(°C)',
'meanT' = 'Mean Temperature(°C)')For the eight CSV files loaded through the reader package the date column changed from combined into one column to divided into three columns: “Year”, “Month”, and “Day” using the ’separate() function.
The “na” value was erased from each city’s dataset and the Year column was converted to a numerical value.
Finally, the eight city datasets were combined into a single dataset named “Korea” and the name of three columns was changed to “maxT”, “Minimum Temperature (°C)” to “miniT”, and “Mean Temperature (°C) to”meanT”.
Jan_Mean <- Korea %>%
filter(Month == "01")
Jan_Mean <- Jan_Mean %>%
group_by(Year) %>%
summarise(mean(meanT))
Jan_Mean <- Jan_Mean %>%
rename('mmeanT' = 'mean(meanT)')
Jan_Mean$'mmeanT' <- round(Jan_Mean$'mmeanT', 1)
Jan_Mean$Jan_Decade <- (Jan_Mean$Year %/% 10) * 10
Jan_avg <- Jan_Mean %>%
group_by(Jan_Decade) %>%
summarize(Decade_Avg = mean(mmeanT, na.rm = TRUE))
Jan_Mean$Jan_7ma <- rollmean(Jan_Mean$mmeanT, 7, fill = NA, align = "center")
ggplot() +
geom_bar(data = Jan_avg, aes(x = Jan_Decade, y = Decade_Avg, fill = "Decadal Average"),
stat = "identity", alpha = 0.7, width = 9) +
geom_line(data = Jan_Mean, aes(x = Year, y = mmeanT, color = "Yearly Temperature"), size = 1) +
geom_line(data = Jan_Mean, aes(x = Year, y = Jan_7ma, color = "7-Year Moving Average"), size = 1) +
labs(
title = "Change in January Mean Temperature (1984-2024)",
x = "Year",
y = "Mean Temperature (°C)",
color = "",
fill = ""
) +
scale_color_manual(values = c("Yearly Temperature" = "yellowgreen", "7-Year Moving Average" = "purple")) +
scale_fill_manual(values = c("Decadal Average" = "blue")) +
theme_minimal() +
theme_tufte() ## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
First, the green line represents the yearly temperature. We can see extreme highs and lows, indicating strong variability in January temperatures from year to year.
Second, the red line, which is the 7-year moving average, shows a smoother pattern. There are periods of mean temperature decrease, such as in the late 2000s. However, the mean temperature usually increases for 40 years. Particularly in the late 1990s and the 2020s.
Lastly, the blue bars represent the 10-year moving average, showing the long-term trend. Here, we notice that while there are ups and downs, the overall tendency is a gradual increase in temperature over the 40-year period.
In summary, this graph highlights significant yearly variability in January temperatures, but the long-term trend suggests January is getting warmer in Korea.
Aug_Mean <- Korea %>%
filter(Month == "08")
Aug_Mean <- Aug_Mean %>%
group_by(Year) %>%
summarise(mean(meanT))
Aug_Mean <- Aug_Mean %>%
rename('mmeanT' = 'mean(meanT)')
Aug_Mean$'mmeanT' <- round(Aug_Mean$'mmeanT', 1)
Aug_Mean$Aug_Decade <- (Aug_Mean$Year %/% 10) * 10
Aug_avg <- Aug_Mean %>%
group_by(Aug_Decade) %>%
summarize(Decade_Avg = mean(mmeanT, na.rm = TRUE))
Aug_Mean$Aug_7ma <- rollmean(Aug_Mean$mmeanT, 7, fill = NA, align = "center")
ggplot() +
geom_bar(data = Aug_avg, aes(x = Aug_Decade, y = Decade_Avg, fill = "Decadal Average"),
stat = "identity", alpha = 0.6, width = 8) +
geom_line(data = Aug_Mean, aes(x = Year, y = mmeanT, color = "Yearly Temperature"), size = 1) +
geom_line(data = Aug_Mean, aes(x = Year, y = Aug_7ma, color = "7-Year Moving Average"), size = 1) +
labs(
title = "Change in August Mean Temperature (1984-2024)",
x = "Year",
y = "Mean Temperature (°C)",
color = "",
fill = ""
) +
scale_color_manual(values = c("Yearly Temperature" = "yellowgreen", "7-Year Moving Average" = "purple")) +
scale_fill_manual(values = c("Decadal Average" = "red")) +
theme_minimal() +
theme_tufte()## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
In this graph, the red bars represent the 10-year moving average, while the green line shows the yearly August temperatures. The purple line indicates the 7-year moving average.
The August temperatures seem relatively stable, with less drastic fluctuations in the yearly temperatures. However, there is a noticeable gradual increase in both the yearly values and the 7-year moving average over time, especially after the 2000s.
In conclusion, while August temperatures show smaller variability compared to January, the overall trend points to August getting hotter in Korea over the decades.
## `summarise()` has grouped output by 'Year', 'Month'. You can override using the
## `.groups` argument.
Total <- Total %>%
mutate(Location = "Total") %>%
select("Year", "Month", "Day", "Location", "mean(maxT)", "mean(miniT)") %>%
filter(Year == 1984 | Year == 2024)
Total <- Total %>%
rename('mmaxT' = 'mean(maxT)',
'mminiT' = 'mean(miniT)')
Total$'mmaxT' <- round(Total$'mmaxT', 1)
Total$'mminiT' <- round(Total$'mminiT', 1)Jan_Total <- Total %>%
filter(Month == "01") %>%
select("Year", "Month", "Day", "mminiT")
Jan_Count <- Jan_Total %>%
group_by(Year) %>%
summarize(days_under_0 = sum(mminiT < 0))
ggplot(Jan_Total, aes(x = Day, y = mminiT, group = Year, color = as.factor(Year))) +
geom_hline(yintercept = 0, color = "gray", linetype = "solid", size = 0.7) +
geom_line(linewidth = 1, alpha = 0.7) +
scale_color_manual(values = c("1984" = "blue", "2024" = "red"),
name = "Year") +
labs(x = "Day of January",
y = "Minimum Temperature (°C)",
title = "Daily Minimum Temperatures in January Between 1984 and 2024") +
theme_minimal() +
theme_tufte() January minimum temperatures in 2024 were higher than in 1984 on nearly every day, with temperatures reaching above zero
In 1984, there were no days with temperatures above zero, and most days stayed below -5°C.
In 2024, about 9 out of 31 days recorded above-zero temperatures, and the rest of the days still showed higher temperatures compared to 1984
Aug_Total <- Total %>%
filter(Month == "08") %>%
select("Year", "Month", "Day", "mmaxT")
ggplot(Aug_Total, aes(x = Day, y = mmaxT, group = Year, color = as.factor(Year))) +
geom_hline(yintercept = 33, color = "gray", linetype = "solid", size = 0.7) +
geom_line(linewidth = 1, alpha = 0.7) +
scale_color_manual(values = c("1984" = "blue", "2024" = "red"),
name = "Year") +
labs(x = "Day of August",
y = "Maximum Temperature (°C)",
title = "Daily Maximum Temperatures in August Between 1984 and 2024") +
theme_minimal() +
theme_tufte()August minimum temperatures in 2024 were higher than in 1984 on nearly every day, with more days exceeding the heatwave threshold of 33°C.
2024 were higher than in 1984 on all days except from the 9th to the 12th.
Over the past 40 years, the strength and frequency of heat waves during summer have increased, which can be seen as a result of climate change
CW_df <- Korea %>%
select("Location", "Year", "Month", "Day", "miniT")
Jan_CW <- CW_df %>%
filter(Month == "01")
Jan_CWL <- Jan_CW %>%
group_by(Year, Location) %>%
summarise(mean(miniT))## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Jan_CWT <- Jan_CW %>%
group_by(Year) %>%
summarise(mean(miniT))
Jan_CWT <- Jan_CWT %>%
mutate(Location = "Total") %>%
select("Year", "Location", "mean(miniT)")
Jan_Cmean <- rbind(Jan_CWL, Jan_CWT)
Jan_Cmean <- Jan_Cmean %>%
rename('mminiT' = 'mean(miniT)')
Jan_Cmean$'mminiT' <- round(Jan_Cmean$'mminiT', 1)
Jan_Cmean <- Jan_Cmean %>%
mutate(Location = factor(Location, levels = c("Total", setdiff(unique(Location), "Total"))))
Jan_Mins <- group_by(Jan_Cmean, Location) %>% slice(which.min(mminiT))
Jan_Maxs <- group_by(Jan_Cmean, Location) %>% slice(which.max(mminiT))
Jan_Ends <- group_by(Jan_Cmean, Location) %>% filter(Year == max(Year))
Jan_quarts <- Jan_Cmean %>% group_by(Location) %>%
summarize(Jan_quart1 = quantile(mminiT, 0.25),
Jan_quart2 = quantile(mminiT, 0.75)) %>%
right_join(Jan_Cmean)## Joining with `by = join_by(Location)`
ggplot(Jan_Cmean, aes(x = Year, y = mminiT)) +
facet_grid(Location ~ ., scales = "free_y") +
geom_ribbon(data = Jan_quarts, aes(ymin = Jan_quart1, ymax = Jan_quart2), fill = 'grey90') +
geom_line(linewidth = 0.3) +
geom_point(data = Jan_Mins, col = 'blue') +
geom_point(data = Jan_Maxs, col = 'red') +
geom_text(data = Jan_Mins, aes(label = round(mminiT, 1)), size = 3, vjust = -1) +
geom_text(data = Jan_Maxs, aes(label = round(mminiT, 1)), size = 3, vjust = 2.5) +
geom_text(data = Jan_Ends, aes(label = round(mminiT, 1)), size = 3, hjust = 0, nudge_x = 1) +
geom_text(data = Jan_Ends, aes(label = Location), size = 3, hjust = 0, nudge_x = 5) +
expand_limits(x = max(Jan_Cmean$Year) + (0.25 * (max(Jan_Cmean$Year) - min(Jan_Cmean$Year)))) +
scale_x_continuous(breaks = seq(min(Jan_Cmean$Year), max(Jan_Cmean$Year), 5)) +
scale_y_continuous(expand = c(0.1, 0)) +
theme_tufte(base_size = 12) +
theme(
axis.title = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
strip.text = element_blank()
)Jan_filtered <- Jan_Cmean[Jan_Cmean$Year %in% c(1984, 2024), ]
Jan_1984 <- Jan_filtered[Jan_filtered$Year == 1984, ]
Jan_2024 <- Jan_filtered[Jan_filtered$Year == 2024, ]
Jan_diff <- merge(Jan_1984, Jan_2024, by = "Location", suffixes = c("_1984", "_2024"))
Jan_diff <- Jan_diff %>%
mutate(Difference = mminiT_2024 - mminiT_1984)
Jan_diffIn January, the overall trend shows a steady increase in minimum temperatures from 1984 to 2024, with a sharp drop in 2011 and a significant rise in 2020.
Comparing 1984 and 2024, Wonju showed the largest temperature increase, rising by 15.6°C.
The coldest northern regions of South Korea have also experienced temperature increases, making their minimum temperatures in 2024 similar to those of other regions
HW_df <- Korea %>%
select("Location", "Year", "Month", "Day", "maxT")
Aug_HW <- HW_df %>%
filter(Month == "08")
Aug_HWL <- Aug_HW %>%
group_by(Year, Location) %>%
summarise(mean(maxT))## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Aug_HWT <- Aug_HW %>%
group_by(Year) %>%
summarise(mean(maxT))
Aug_HWT <- Aug_HWT %>%
mutate(Location = "Total") %>%
select("Year", "Location", "mean(maxT)")
Aug_Hmean <- rbind(Aug_HWL, Aug_HWT)
Aug_Hmean <- Aug_Hmean %>%
rename('mmaxT' = 'mean(maxT)')
Aug_Hmean$'mmaxT' <- round(Aug_Hmean$'mmaxT', 1)
Aug_Hmean <- Aug_Hmean %>%
mutate(Location = factor(Location, levels = c("Total", setdiff(unique(Location), "Total"))))
Aug_Mins <- group_by(Aug_Hmean, Location) %>% slice(which.min(mmaxT))
Aug_Maxs <- group_by(Aug_Hmean, Location) %>% slice(which.max(mmaxT))
Aug_Ends <- group_by(Aug_Hmean, Location) %>% filter(Year == max(Year))
Aug_quarts <- Aug_Hmean %>% group_by(Location) %>%
summarize(Aug_quart1 = quantile(mmaxT, 0.25),
Aug_quart2 = quantile(mmaxT, 0.75)) %>%
right_join(Aug_Hmean)## Joining with `by = join_by(Location)`
ggplot(Aug_Hmean, aes(x = Year, y = mmaxT)) +
facet_grid(Location ~ ., scales = "free_y") +
geom_ribbon(data = Aug_quarts, aes(ymin = Aug_quart1, ymax = Aug_quart2), fill = 'grey90') +
geom_line(linewidth = 0.3) +
geom_point(data = Aug_Mins, col = 'blue') +
geom_point(data = Aug_Maxs, col = 'red') +
geom_text(data = Aug_Mins, aes(label = round(mmaxT, 1)), size = 3, vjust = -1) +
geom_text(data = Aug_Maxs, aes(label = round(mmaxT, 1)), size = 3, vjust = 2.5) +
geom_text(data = Aug_Ends, aes(label = round(mmaxT, 1)), size = 3, hjust = 0, nudge_x = 1) +
geom_text(data = Aug_Ends, aes(label = Location), size = 3, hjust = 0, nudge_x = 5) +
expand_limits(x = max(Aug_Hmean$Year) + (0.25 * (max(Aug_Hmean$Year) - min(Aug_Hmean$Year)))) +
scale_x_continuous(breaks = seq(min(Aug_Hmean$Year), max(Aug_Hmean$Year), 5)) +
scale_y_continuous(expand = c(0.1, 0)) +
theme_tufte(base_size = 12) +
theme(
axis.title = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
strip.text = element_blank()
)Aug_filtered <- Aug_Hmean[Aug_Hmean$Year %in% c(1984, 2024), ]
Aug_1984 <- Aug_filtered[Aug_filtered$Year == 1984, ]
Aug_2024 <- Aug_filtered[Aug_filtered$Year == 2024, ]
Aug_diff <- merge(Aug_1984, Aug_2024, by = "Location", suffixes = c("_1984", "_2024"))
Aug_diff <- Aug_diff %>%
mutate(Difference = mmaxT_2024 - mmaxT_1984)
Aug_diffIn August, the overall trend shows a steady rise in maximum temperatures from 1984 to 2024
Except for Daegu, all cities reached their highest temperatures after 2010
Comparing 1984 and 2024, Seoul and Incheon showed the largest increases, rising by 2.8°C and 2.7°C