summary(climate)
## Year Month Avg_Temp_(C) Max_Temp_(C)
## Min. :2020 Min. : 1.00 Min. :-4.965 Min. : 1.544
## 1st Qu.:2021 1st Qu.: 3.00 1st Qu.: 4.683 1st Qu.:16.309
## Median :2022 Median : 6.00 Median :10.423 Median :22.589
## Mean :2022 Mean : 6.17 Mean :13.019 Mean :23.169
## 3rd Qu.:2023 3rd Qu.: 9.00 3rd Qu.:22.993 3rd Qu.:31.739
## Max. :2024 Max. :12.00 Max. :34.282 Max. :38.713
## NA's :7 NA's :8
## Min_Temp_(C) Precipitation_(mm) Humidity_() Wind_Speed_(ms)
## Min. :-9.196 Min. : 2.957 Min. :30.87 Min. : 0.3086
## 1st Qu.:-3.550 1st Qu.: 50.929 1st Qu.:48.90 1st Qu.: 4.7731
## Median : 9.522 Median :106.553 Median :69.74 Median : 9.1217
## Mean : 8.391 Mean :107.846 Mean :67.27 Mean : 8.5464
## 3rd Qu.:17.642 3rd Qu.:171.851 3rd Qu.:85.81 3rd Qu.:12.4623
## Max. :27.846 Max. :199.450 Max. :98.06 Max. :14.9693
## NA's :10 NA's :6 NA's :9 NA's :4
## Solar_Irradiance_(Wm2) Cloud_Cover_() CO2_Concentration_(ppm)
## Min. : 59.23 Min. : 0.4555 Min. :400.4
## 1st Qu.:103.97 1st Qu.:27.9924 1st Qu.:412.4
## Median :174.31 Median :55.0573 Median :425.7
## Mean :173.83 Mean :53.3467 Mean :423.5
## 3rd Qu.:240.32 3rd Qu.:82.0561 3rd Qu.:436.7
## Max. :298.45 Max. :98.8839 Max. :444.5
## NA's :6 NA's :5 NA's :6
## Urbanization_Index Vegetation_Index ENSO_Index
## Min. :0.01669 Min. :0.03047 Min. :-0.89402
## 1st Qu.:0.25770 1st Qu.:0.18709 1st Qu.:-0.39182
## Median :0.55813 Median :0.42003 Median : 0.01528
## Mean :0.53877 Mean :0.44190 Mean : 0.08495
## 3rd Qu.:0.75519 3rd Qu.:0.60724 3rd Qu.: 0.62693
## Max. :0.99810 Max. :0.98972 Max. : 0.99825
## NA's :5 NA's :4 NA's :6
## Particulate_Matter_(gm3) Sea_Surface_Temp_(C)
## Min. :10.09 Min. :10.00
## 1st Qu.:21.41 1st Qu.:15.50
## Median :30.44 Median :17.90
## Mean :29.30 Mean :19.55
## 3rd Qu.:37.23 3rd Qu.:26.30
## Max. :48.65 Max. :29.99
## NA's :5 NA's :8
vis_miss(climate)

vis_miss(climate_imputed)

climate_imputed %>%
group_by(Year) %>%
summarise(mean_temp = mean(`Avg_Temp_(C)`)) %>%
ggplot(aes(Year, mean_temp)) +
geom_line(color = "forestgreen", linewidth = 1.2) +
geom_smooth(method = "loess", se = FALSE, color = "orange") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2020
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4.0804

climate_imputed %>%
group_by(Year) %>%
summarise(mean_co2 = mean(`CO2_Concentration_(ppm)`)) %>%
ggplot(aes(Year, mean_co2)) +
geom_line(color = "purple", linewidth = 1.2) +
theme_minimal()

climate_imputed %>%
ggplot(aes(factor(Month), `Avg_Temp_(C)`, fill = factor(Month))) +
geom_boxplot(show.legend = FALSE) +
theme_minimal()

climate_imputed %>%
ggplot(aes(factor(Month), `CO2_Concentration_(ppm)`, fill = factor(Month))) +
geom_boxplot(show.legend = FALSE) +
theme_minimal()

climate_imputed %>%
ggplot(aes(`CO2_Concentration_(ppm)`, `Avg_Temp_(C)`, color = factor(Year))) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

climate_imputed %>%
select(where(is.numeric)) %>%
ggpairs(progress = FALSE)

climate_imputed %>%
ggplot(aes(Year, `Humidity_()`)) +
geom_line(color = "blue") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, `Wind_Speed_(ms)`)) +
geom_line(color = "red") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, `Solar_Irradiance_(Wm2)`)) +
geom_line(color = "gold") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, `Cloud_Cover_()`)) +
geom_line(color = "gray") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, `Particulate_Matter_(gm3)`)) +
geom_line(color = "brown") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, `Sea_Surface_Temp_(C)`)) +
geom_line(color = "darkgreen") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, `Precipitation_(mm)`)) +
geom_line(color = "darkblue") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, Vegetation_Index)) +
geom_line(color = "forestgreen") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, ENSO_Index)) +
geom_line(color = "purple") +
theme_minimal()

climate_imputed %>%
ggplot(aes(Year, Urbanization_Index)) +
geom_line(color = "brown") +
theme_minimal()

climate_imputed %>%
group_by(Month) %>%
summarise(mean_temp = mean(`Avg_Temp_(C)`)) %>%
ggplot(aes(Month, mean_temp)) +
geom_line(color = "darkred") +
theme_minimal()

cor_matrix <- climate_imputed %>%
select(where(is.numeric)) %>%
cor(use = "pairwise.complete.obs")
cor_df <- as.data.frame(as.table(cor_matrix))
ggplot(cor_df, aes(Var1, Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "steelblue") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
