getwd()
## [1] "C:/Users/admin/Desktop/R/R 기말프로젝트"
setwd("C:/Users/admin/Desktop/R/R 기말프로젝트")
library(readxl)
## Warning: 패키지 'readxl'는 R 버전 4.3.2에서 작성되었습니다
excel_file <- "C://Users//admin//Desktop//R//R 기말프로젝트//2000-2010_winter.xlsx"
sheet_names <- excel_sheets(excel_file)
data_list <- lapply(sheet_names, function(sheet) read_excel(excel_file, sheet = sheet))
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.3.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Assuming the sheets are ordered by year (Sheet1: 2000, Sheet2: 2001, ...)
years <- 2000:(2000 + length(sheet_names) - 1)
data_list <- Map(cbind, data_list, year = years)
combined_data <- bind_rows(data_list)
head(combined_data)
## 지점 지점명 일시 평균기온(℃) 평균최고 최고기온(℃) 최고기온
## 1 NA <NA> <NA> NA 기온(℃) NA 일자
## 2 108 서울 2000-12-01 0.9 4.9000000000000004 11.9 36869
## 3 108 서울 2000-01-01 -2.1 1.7 9.9 36526
## 4 108 서울 2000-02-01 -1.7 2.6 7.5 36575
## 5 NA <NA> <NA> NA 기온(℃) NA 일자
## 6 108 서울 2001-12-01 -0.6 3.4 10.9 37236
## 평균최저 최저기온(℃) 최저기온 year
## 1 기온(℃) NA 일자 2000
## 2 -2.9 -11.4 36886 2000
## 3 -5.7 -12.1 36532 2000
## 4 -5.8 -11.6 36557 2000
## 5 기온(℃) NA 일자 2001
## 6 -4 -8.8 37243 2001
excel_file2 <- "C://Users//admin//Desktop//R//R 기말프로젝트//2011-2022_winter.xlsx"
sheet_names <- excel_sheets(excel_file2)
data_list <- lapply(sheet_names, function(sheet) read_excel(excel_file2, sheet = sheet))
# Assuming the sheets are ordered by year (Sheet1: 2000, Sheet2: 2001, ...)
years <- 2011:(2011 + length(sheet_names) - 1)
data_list <- Map(cbind, data_list, year = years)
combined_data2 <- bind_rows(data_list)
head(combined_data2)
## 지점 지점명 일시 평균기온(℃) 평균최고 최고기온(℃) 최고기온
## 1 NA <NA> <NA> NA 기온(℃) NA 일자
## 2 108 서울 2011-12-01 -0.9 2.9 9.1 40880
## 3 108 서울 2011-01-01 -7.2 -3.4 0.3 40557
## 4 108 서울 2011-02-01 1.2 5.9 13.2 40600
## 5 NA <NA> <NA> NA 기온(℃) NA 일자
## 6 108 서울 2012-12-01 -4.1 -0.5 8.3 41258
## 평균최저 최저기온(℃) 최저기온 year
## 1 기온(℃) NA 일자 2011
## 2 -4.0999999999999996 -10.6 40894 2011
## 3 -10.5 -17.8 40559 2011
## 4 -2.5 -9.2 40586 2011
## 5 기온(℃) NA 일자 2012
## 6 -7.4 -14.5 41269 2012
excel_file3 <- "C://Users//admin//Desktop//R//R 기말프로젝트//1920-1940_winter.xlsx"
sheet_names <- excel_sheets(excel_file3)
data_list <- lapply(sheet_names, function(sheet) read_excel(excel_file3, sheet = sheet))
# Assuming the sheets are ordered by year (Sheet1: 2000, Sheet2: 2001, ...)
years <- 1920:(1920 + length(sheet_names) - 1)
data_list <- Map(cbind, data_list, year = years)
combined_data3 <- bind_rows(data_list)
head(combined_data3)
## 지점 지점명 일시 평균기온(℃) 평균최고 최고기온(℃) 최고기온 평균최저
## 1 NA <NA> <NA> NA 기온(℃) NA 일자 기온(℃)
## 2 108 서울 1920-12-01 -1.2 3.2 12.9 7641 -6.3
## 3 108 서울 1920-01-01 -6.8 -1.9 7.5 7336 -12.7
## 4 108 서울 1920-02-01 -3.2 1 8.5 7337 -7.5
## 5 NA <NA> <NA> NA 기온(℃) NA 일자 기온(℃)
## 6 108 서울 1921-12-01 -0.7 4.2 11.7 8014 -5.4
## 최저기온(℃) 최저기온 year
## 1 NA 일자 1920
## 2 -18.9 7668 1920
## 3 -22.3 7309 1920
## 4 -18.5 7347 1920
## 5 NA 일자 1921
## 6 -13.6 8030 1921
combined_data <- rename(combined_data, "date"="일시", "mean" = "평균기온(℃)", "min"="최저기온(℃)", "max"="최고기온(℃)" )
View(combined_data)
combined_data2 <- rename(combined_data2, "date"="일시", "mean" = "평균기온(℃)", "min"="최저기온(℃)", "max"="최고기온(℃)" )
View(combined_data2)
combined_data3 <- rename(combined_data3, "date"="일시", "mean" = "평균기온(℃)", "min"="최저기온(℃)", "max"="최고기온(℃)" )
View(combined_data3)
combined_data_1_2 <- bind_rows(
combined_data, combined_data2
)
View(combined_data_1_2)
library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.3.2에서 작성되었습니다
colnames(combined_data_1_2)
## [1] "지점" "지점명" "date" "mean" "평균최고" "max"
## [7] "최고기온" "평균최저" "min" "최저기온" "year"
mean_values_1_2 <- combined_data_1_2 %>%
group_by(year) %>%
summarize(mean_value_1_2 = mean(mean, na.rm = TRUE))
print(mean_values_1_2)
## # A tibble: 23 × 2
## year mean_value_1_2
## <int> <dbl>
## 1 2000 -0.967
## 2 2001 -1.67
## 3 2002 1.63
## 4 2003 0.3
## 5 2004 0.9
## 6 2005 -2.77
## 7 2006 0.433
## 8 2007 2.07
## 9 2008 -0.6
## 10 2009 -0.0333
## # ℹ 13 more rows
ggplot(mean_values_1_2, aes(x = year, y = mean_value_1_2)) +
geom_line() +
labs(
title = "Mean Temperature Over Years_2000-2022",
x = "Year",
y = "Mean Temperature"
)
그러하여, 아주 먼 옛날인 1900년대의 데이터도 끌어와 다시한번 그래프를 그려보겠다.
colnames(combined_data3)
## [1] "지점" "지점명" "date" "mean" "평균최고" "max"
## [7] "최고기온" "평균최저" "min" "최저기온" "year"
mean_values3 <- combined_data3 %>%
group_by(year) %>%
summarize(mean_value3 = mean(mean, na.rm = TRUE))
print(mean_values3)
## # A tibble: 21 × 2
## year mean_value3
## <int> <dbl>
## 1 1920 -3.73
## 2 1921 -1.57
## 3 1922 -3.67
## 4 1923 -3.63
## 5 1924 -1.97
## 6 1925 -2.8
## 7 1926 -3.77
## 8 1927 -3
## 9 1928 -3.13
## 10 1929 -2.03
## # ℹ 11 more rows
ggplot(mean_values3, aes(x = year, y = mean_value3)) +
geom_line() +
labs(
title = "Mean Temperature Over Years_1920-1940",
x = "Year",
y = "Mean Temperature"
)
( 겨울 온도변화차 : 2000-2020 > 1920-1940 )
variance_std_values_1_2 <- combined_data_1_2 %>%
group_by(year) %>%
summarize(
variance_value_1_2 = var(mean, na.rm = TRUE),
std_deviation_value_1_2 = sd(mean, na.rm = TRUE)
)
print(variance_std_values_1_2)
## # A tibble: 23 × 3
## year variance_value_1_2 std_deviation_value_1_2
## <int> <dbl> <dbl>
## 1 2000 2.65 1.63
## 2 2001 4.46 2.11
## 3 2002 1.69 1.30
## 4 2003 6.88 2.62
## 5 2004 4.36 2.09
## 6 2005 1.05 1.03
## 7 2006 0.723 0.850
## 8 2007 3.29 1.81
## 9 2008 2.23 1.49
## 10 2009 6.70 2.59
## # ℹ 13 more rows
ggplot(variance_std_values_1_2, aes(x = year, y = variance_value_1_2)) +
geom_line() +
labs(
title = "Variance Over Years_2000-2022",
x = "Year",
y = "Variance"
)
ggplot(variance_std_values_1_2, aes(x = year, y = std_deviation_value_1_2)) +
geom_line() +
labs(
title = "Standard Deviation Over Years_2000-2022",
x = "Year",
y = "Standard Deviation"
)
variance_std_values3 <- combined_data3 %>%
group_by(year) %>%
summarize(
variance_value3 = var(mean, na.rm = TRUE),
std_deviation_value3 = sd(mean, na.rm = TRUE)
)
print(variance_std_values3)
## # A tibble: 21 × 3
## year variance_value3 std_deviation_value3
## <int> <dbl> <dbl>
## 1 1920 8.05 2.84
## 2 1921 1.05 1.03
## 3 1922 18.1 4.25
## 4 1923 3.72 1.93
## 5 1924 0.223 0.473
## 6 1925 1.83 1.35
## 7 1926 1.36 1.17
## 8 1927 1.11 1.05
## 9 1928 3.24 1.80
## 10 1929 10.4 3.22
## # ℹ 11 more rows
ggplot(variance_std_values3, aes(x = year, y = variance_value3)) +
geom_line() +
labs(
title = "Variance Over Years_1920-1940",
x = "Year",
y = "Variance"
)
ggplot(variance_std_values3, aes(x = year, y = std_deviation_value3)) +
geom_line() +
labs(
title = "Standard Deviation Over Years_1920-1940",
x = "Year",
y = "Standard Deviation"
)