getwd()
## [1] "C:/Users/admin/Desktop/R/R 기말프로젝트"
setwd("C:/Users/admin/Desktop/R/R 기말프로젝트")
library(readxl)
## Warning: 패키지 'readxl'는 R 버전 4.3.2에서 작성되었습니다
excel_file <- "C://Users//admin//Desktop//R//R 기말프로젝트//2000-2010_winter.xlsx"
sheet_names <- excel_sheets(excel_file)
data_list <- lapply(sheet_names, function(sheet) read_excel(excel_file, sheet = sheet))
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.3.2에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Assuming the sheets are ordered by year (Sheet1: 2000, Sheet2: 2001, ...)
years <- 2000:(2000 + length(sheet_names) - 1)
data_list <- Map(cbind, data_list, year = years)
combined_data <- bind_rows(data_list)
head(combined_data)
## 지점 지점명 일시 평균기온(℃) 평균최고 최고기온(℃) 최고기온
## 1 NA <NA> <NA> NA 기온(℃) NA 일자
## 2 108 서울 2000-12-01 0.9 4.9000000000000004 11.9 36869
## 3 108 서울 2000-01-01 -2.1 1.7 9.9 36526
## 4 108 서울 2000-02-01 -1.7 2.6 7.5 36575
## 5 NA <NA> <NA> NA 기온(℃) NA 일자
## 6 108 서울 2001-12-01 -0.6 3.4 10.9 37236
## 평균최저 최저기온(℃) 최저기온 year
## 1 기온(℃) NA 일자 2000
## 2 -2.9 -11.4 36886 2000
## 3 -5.7 -12.1 36532 2000
## 4 -5.8 -11.6 36557 2000
## 5 기온(℃) NA 일자 2001
## 6 -4 -8.8 37243 2001
excel_file2 <- "C://Users//admin//Desktop//R//R 기말프로젝트//2011-2022_winter.xlsx"
sheet_names <- excel_sheets(excel_file2)
data_list <- lapply(sheet_names, function(sheet) read_excel(excel_file2, sheet = sheet))
# Assuming the sheets are ordered by year (Sheet1: 2000, Sheet2: 2001, ...)
years <- 2011:(2011 + length(sheet_names) - 1)
data_list <- Map(cbind, data_list, year = years)
combined_data2 <- bind_rows(data_list)
head(combined_data2)
## 지점 지점명 일시 평균기온(℃) 평균최고 최고기온(℃) 최고기온
## 1 NA <NA> <NA> NA 기온(℃) NA 일자
## 2 108 서울 2011-12-01 -0.9 2.9 9.1 40880
## 3 108 서울 2011-01-01 -7.2 -3.4 0.3 40557
## 4 108 서울 2011-02-01 1.2 5.9 13.2 40600
## 5 NA <NA> <NA> NA 기온(℃) NA 일자
## 6 108 서울 2012-12-01 -4.1 -0.5 8.3 41258
## 평균최저 최저기온(℃) 최저기온 year
## 1 기온(℃) NA 일자 2011
## 2 -4.0999999999999996 -10.6 40894 2011
## 3 -10.5 -17.8 40559 2011
## 4 -2.5 -9.2 40586 2011
## 5 기온(℃) NA 일자 2012
## 6 -7.4 -14.5 41269 2012
excel_file3 <- "C://Users//admin//Desktop//R//R 기말프로젝트//1920-1940_winter.xlsx"
sheet_names <- excel_sheets(excel_file3)
data_list <- lapply(sheet_names, function(sheet) read_excel(excel_file3, sheet = sheet))
# Assuming the sheets are ordered by year (Sheet1: 2000, Sheet2: 2001, ...)
years <- 1920:(1920 + length(sheet_names) - 1)
data_list <- Map(cbind, data_list, year = years)
combined_data3 <- bind_rows(data_list)
head(combined_data3)
## 지점 지점명 일시 평균기온(℃) 평균최고 최고기온(℃) 최고기온 평균최저
## 1 NA <NA> <NA> NA 기온(℃) NA 일자 기온(℃)
## 2 108 서울 1920-12-01 -1.2 3.2 12.9 7641 -6.3
## 3 108 서울 1920-01-01 -6.8 -1.9 7.5 7336 -12.7
## 4 108 서울 1920-02-01 -3.2 1 8.5 7337 -7.5
## 5 NA <NA> <NA> NA 기온(℃) NA 일자 기온(℃)
## 6 108 서울 1921-12-01 -0.7 4.2 11.7 8014 -5.4
## 최저기온(℃) 최저기온 year
## 1 NA 일자 1920
## 2 -18.9 7668 1920
## 3 -22.3 7309 1920
## 4 -18.5 7347 1920
## 5 NA 일자 1921
## 6 -13.6 8030 1921
combined_data <- rename(combined_data, "date"="일시", "mean" = "평균기온(℃)", "min"="최저기온(℃)", "max"="최고기온(℃)" )
View(combined_data)
combined_data2 <- rename(combined_data2, "date"="일시", "mean" = "평균기온(℃)", "min"="최저기온(℃)", "max"="최고기온(℃)" )
View(combined_data2)
combined_data3 <- rename(combined_data3, "date"="일시", "mean" = "평균기온(℃)", "min"="최저기온(℃)", "max"="최고기온(℃)" )
View(combined_data3)
combined_data_1_2 <- bind_rows(
combined_data, combined_data2
)
View(combined_data_1_2)
library(ggplot2)
## Warning: 패키지 'ggplot2'는 R 버전 4.3.2에서 작성되었습니다
colnames(combined_data_1_2)
## [1] "지점" "지점명" "date" "mean" "평균최고" "max"
## [7] "최고기온" "평균최저" "min" "최저기온" "year"
mean_values_1_2 <- combined_data_1_2 %>%
group_by(year) %>%
summarize(mean_value_1_2 = mean(mean, na.rm = TRUE))
print(mean_values_1_2)
## # A tibble: 23 × 2
## year mean_value_1_2
## <int> <dbl>
## 1 2000 -0.967
## 2 2001 -1.67
## 3 2002 1.63
## 4 2003 0.3
## 5 2004 0.9
## 6 2005 -2.77
## 7 2006 0.433
## 8 2007 2.07
## 9 2008 -0.6
## 10 2009 -0.0333
## # ℹ 13 more rows
ggplot(mean_values_1_2, aes(x = year, y = mean_value_1_2)) +
geom_line() +
labs(
title = "Mean Temperature Over Years_2000-2022",
x = "Year",
y = "Mean Temperature"
)
그러하여, 아주 먼 옛날인 1900년대의 데이터도 끌어와 다시한번 그래프를 그려보겠다.
colnames(combined_data3)
## [1] "지점" "지점명" "date" "mean" "평균최고" "max"
## [7] "최고기온" "평균최저" "min" "최저기온" "year"
library(dplyr)
library(ggplot2)
mean_values3 <- combined_data3 %>%
group_by(year) %>%
summarize(mean_value3 = mean(mean, na.rm = TRUE))
print(mean_values3)
## # A tibble: 21 × 2
## year mean_value3
## <int> <dbl>
## 1 1920 -3.73
## 2 1921 -1.57
## 3 1922 -3.67
## 4 1923 -3.63
## 5 1924 -1.97
## 6 1925 -2.8
## 7 1926 -3.77
## 8 1927 -3
## 9 1928 -3.13
## 10 1929 -2.03
## # ℹ 11 more rows
ggplot(mean_values3, aes(x = year, y = mean_value3)) +
geom_line() +
labs(
title = "Mean Temperature Over Years_1920-1940",
x = "Year",
y = "Mean Temperature"
)
( 겨울 온도변화차 : 2000-2020 > 1920-1940 )
variance_std_values_1_2 <- combined_data_1_2 %>%
group_by(year) %>%
summarize(
variance_value_1_2 = var(mean, na.rm = TRUE),
std_deviation_value_1_2 = sd(mean, na.rm = TRUE)
)
print(variance_std_values_1_2)
## # A tibble: 23 × 3
## year variance_value_1_2 std_deviation_value_1_2
## <int> <dbl> <dbl>
## 1 2000 2.65 1.63
## 2 2001 4.46 2.11
## 3 2002 1.69 1.30
## 4 2003 6.88 2.62
## 5 2004 4.36 2.09
## 6 2005 1.05 1.03
## 7 2006 0.723 0.850
## 8 2007 3.29 1.81
## 9 2008 2.23 1.49
## 10 2009 6.70 2.59
## # ℹ 13 more rows
ggplot(variance_std_values_1_2, aes(x = year, y = variance_value_1_2)) +
geom_line() +
labs(
title = "Variance Over Years_2000-2022",
x = "Year",
y = "Variance"
)
ggplot(variance_std_values_1_2, aes(x = year, y = std_deviation_value_1_2)) +
geom_line() +
labs(
title = "Standard Deviation Over Years_2000-2022",
x = "Year",
y = "Standard Deviation"
)
variance_std_values3 <- combined_data3 %>%
group_by(year) %>%
summarize(
variance_value3 = var(mean, na.rm = TRUE),
std_deviation_value3 = sd(mean, na.rm = TRUE)
)
print(variance_std_values3)
## # A tibble: 21 × 3
## year variance_value3 std_deviation_value3
## <int> <dbl> <dbl>
## 1 1920 8.05 2.84
## 2 1921 1.05 1.03
## 3 1922 18.1 4.25
## 4 1923 3.72 1.93
## 5 1924 0.223 0.473
## 6 1925 1.83 1.35
## 7 1926 1.36 1.17
## 8 1927 1.11 1.05
## 9 1928 3.24 1.80
## 10 1929 10.4 3.22
## # ℹ 11 more rows
ggplot(variance_std_values3, aes(x = year, y = variance_value3)) +
geom_line() +
labs(
title = "Variance Over Years_1920-1940",
x = "Year",
y = "Variance"
)
ggplot(variance_std_values3, aes(x = year, y = std_deviation_value3)) +
geom_line() +
labs(
title = "Standard Deviation Over Years_1920-1940",
x = "Year",
y = "Standard Deviation"
)
colnames(mean_values_1_2)
## [1] "year" "mean_value_1_2"
library(dplyr)
lm_model <- lm(mean_value_1_2 ~ year, data = mean_values_1_2)
summary(lm_model)
##
## Call:
## lm(formula = mean_value_1_2 ~ year, data = mean_values_1_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.38172 -1.11523 0.06064 1.14536 2.54572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.02757 91.33218 0.460 0.650
## year -0.02118 0.04542 -0.466 0.646
##
## Residual standard error: 1.445 on 21 degrees of freedom
## Multiple R-squared: 0.01025, Adjusted R-squared: -0.03688
## F-statistic: 0.2175 on 1 and 21 DF, p-value: 0.6458
ggplot(mean_values_1_2, aes(x = year, y = mean_value_1_2)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Linear Regression: Mean Temperature Over Years_2000-2022",
x = "Year",
y = "Mean Temperature"
)
## `geom_smooth()` using formula = 'y ~ x'
colnames(mean_values3)
## [1] "year" "mean_value3"
library(dplyr)
lm_model3 <- lm(mean_value3 ~ year, data = mean_values3)
summary(lm_model3)
##
## Call:
## lm(formula = mean_value3 ~ year, data = mean_values3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6636 -0.8608 -0.1873 0.4597 2.3963
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.54069 69.14147 0.239 0.813
## year -0.01004 0.03582 -0.280 0.782
##
## Residual standard error: 0.9941 on 19 degrees of freedom
## Multiple R-squared: 0.00412, Adjusted R-squared: -0.0483
## F-statistic: 0.07859 on 1 and 19 DF, p-value: 0.7822
ggplot(mean_values3, aes(x = year, y = mean_value3)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Linear Regression: Mean Temperature Over Years_1920-1940",
x = "Year",
y = "Mean Temperature"
)
## `geom_smooth()` using formula = 'y ~ x'
#### => lm 선을 그려 확인해본 결과, 1900년대 겨울 평균온도의 선
기울기에 비해, 2000년대 겨울평균온도의 선 기울기가 더 치우쳐진 것을
볼수있음.
colnames(variance_std_values_1_2)
## [1] "year" "variance_value_1_2"
## [3] "std_deviation_value_1_2"
library(dplyr)
lm_model_var <- lm(variance_value_1_2 ~ year, data = variance_std_values_1_2)
summary(lm_model_var)
##
## Call:
## lm(formula = variance_value_1_2 ~ year, data = variance_std_values_1_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.522 -2.474 -1.302 1.640 15.053
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79.8738 259.2184 0.308 0.761
## year -0.0377 0.1289 -0.292 0.773
##
## Residual standard error: 4.101 on 21 degrees of freedom
## Multiple R-squared: 0.004057, Adjusted R-squared: -0.04337
## F-statistic: 0.08555 on 1 and 21 DF, p-value: 0.7728
ggplot(variance_std_values_1_2, aes(x = year, y = variance_value_1_2)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "green") +
labs(
title = "Linear Regression: Mean Temperature Over Years_2000-2022",
x = "Year",
y = "Mean Temperature"
)
## `geom_smooth()` using formula = 'y ~ x'
colnames(variance_std_values3)
## [1] "year" "variance_value3" "std_deviation_value3"
library(dplyr)
lm_model_var3 <- lm(variance_value3 ~ year, data = variance_std_values3)
summary(lm_model_var3)
##
## Call:
## lm(formula = variance_value3 ~ year, data = variance_std_values3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.276 -4.351 -3.349 3.913 13.589
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -499.8061 435.7284 -1.147 0.266
## year 0.2625 0.2258 1.163 0.259
##
## Residual standard error: 6.265 on 19 degrees of freedom
## Multiple R-squared: 0.06641, Adjusted R-squared: 0.01728
## F-statistic: 1.352 on 1 and 19 DF, p-value: 0.2594
ggplot(variance_std_values3, aes(x = year, y = variance_value3)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "green") +
labs(
title = "Linear Regression: Mean Temperature Over Years_1920-1940",
x = "Year",
y = "Mean Temperature"
)
## `geom_smooth()` using formula = 'y ~ x'
colnames(variance_std_values_1_2)
## [1] "year" "variance_value_1_2"
## [3] "std_deviation_value_1_2"
library(dplyr)
lm_model_std <- lm(std_deviation_value_1_2 ~ year, data = variance_std_values_1_2)
summary(lm_model_std)
##
## Call:
## lm(formula = std_deviation_value_1_2 ~ year, data = variance_std_values_1_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0529 -0.5824 -0.1794 0.5631 2.5302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.83014 53.75390 0.499 0.623
## year -0.01243 0.02673 -0.465 0.647
##
## Residual standard error: 0.8503 on 21 degrees of freedom
## Multiple R-squared: 0.01019, Adjusted R-squared: -0.03695
## F-statistic: 0.2161 on 1 and 21 DF, p-value: 0.6468
ggplot(variance_std_values_1_2, aes(x = year, y = std_deviation_value_1_2)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(
title = "Standard Deviation: Mean Temperature Over Years_2000-2022",
x = "Year",
y = "Mean Temperature"
)
## `geom_smooth()` using formula = 'y ~ x'
colnames(variance_std_values3)
## [1] "year" "variance_value3" "std_deviation_value3"
library(dplyr)
lm_model_std3 <- lm(std_deviation_value3 ~ year, data = variance_std_values3)
summary(lm_model_std3)
##
## Call:
## lm(formula = std_deviation_value3 ~ year, data = variance_std_values3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4832 -0.7465 -0.5207 0.9193 2.4210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -115.37790 81.36633 -1.418 0.172
## year 0.06098 0.04216 1.447 0.164
##
## Residual standard error: 1.17 on 19 degrees of freedom
## Multiple R-squared: 0.09921, Adjusted R-squared: 0.0518
## F-statistic: 2.092 on 1 and 19 DF, p-value: 0.1643
ggplot(variance_std_values3, aes(x = year, y = std_deviation_value3)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(
title = "Standard Deviation: Mean Temperature Over Years_1920-1940",
x = "Year",
y = "Mean Temperature"
)
## `geom_smooth()` using formula = 'y ~ x'