summary(climate)
##       Year          Month        Avg_Temp_(C)     Max_Temp_(C)   
##  Min.   :2020   Min.   : 1.00   Min.   :-4.965   Min.   : 1.544  
##  1st Qu.:2021   1st Qu.: 3.00   1st Qu.: 4.683   1st Qu.:16.309  
##  Median :2022   Median : 6.00   Median :10.423   Median :22.589  
##  Mean   :2022   Mean   : 6.17   Mean   :13.019   Mean   :23.169  
##  3rd Qu.:2023   3rd Qu.: 9.00   3rd Qu.:22.993   3rd Qu.:31.739  
##  Max.   :2024   Max.   :12.00   Max.   :34.282   Max.   :38.713  
##                                 NA's   :7        NA's   :8       
##   Min_Temp_(C)    Precipitation_(mm)  Humidity_()    Wind_Speed_(ms)  
##  Min.   :-9.196   Min.   :  2.957    Min.   :30.87   Min.   : 0.3086  
##  1st Qu.:-3.550   1st Qu.: 50.929    1st Qu.:48.90   1st Qu.: 4.7731  
##  Median : 9.522   Median :106.553    Median :69.74   Median : 9.1217  
##  Mean   : 8.391   Mean   :107.846    Mean   :67.27   Mean   : 8.5464  
##  3rd Qu.:17.642   3rd Qu.:171.851    3rd Qu.:85.81   3rd Qu.:12.4623  
##  Max.   :27.846   Max.   :199.450    Max.   :98.06   Max.   :14.9693  
##  NA's   :10       NA's   :6          NA's   :9       NA's   :4        
##  Solar_Irradiance_(Wm2) Cloud_Cover_()    CO2_Concentration_(ppm)
##  Min.   : 59.23         Min.   : 0.4555   Min.   :400.4          
##  1st Qu.:103.97         1st Qu.:27.9924   1st Qu.:412.4          
##  Median :174.31         Median :55.0573   Median :425.7          
##  Mean   :173.83         Mean   :53.3467   Mean   :423.5          
##  3rd Qu.:240.32         3rd Qu.:82.0561   3rd Qu.:436.7          
##  Max.   :298.45         Max.   :98.8839   Max.   :444.5          
##  NA's   :6              NA's   :5         NA's   :6              
##  Urbanization_Index Vegetation_Index    ENSO_Index      
##  Min.   :0.01669    Min.   :0.03047   Min.   :-0.89402  
##  1st Qu.:0.25770    1st Qu.:0.18709   1st Qu.:-0.39182  
##  Median :0.55813    Median :0.42003   Median : 0.01528  
##  Mean   :0.53877    Mean   :0.44190   Mean   : 0.08495  
##  3rd Qu.:0.75519    3rd Qu.:0.60724   3rd Qu.: 0.62693  
##  Max.   :0.99810    Max.   :0.98972   Max.   : 0.99825  
##  NA's   :5          NA's   :4         NA's   :6         
##  Particulate_Matter_(gm3) Sea_Surface_Temp_(C)
##  Min.   :10.09            Min.   :10.00       
##  1st Qu.:21.41            1st Qu.:15.50       
##  Median :30.44            Median :17.90       
##  Mean   :29.30            Mean   :19.55       
##  3rd Qu.:37.23            3rd Qu.:26.30       
##  Max.   :48.65            Max.   :29.99       
##  NA's   :5                NA's   :8
vis_miss(climate)

vis_miss(climate_imputed)

climate_imputed %>%
  group_by(Year) %>%
  summarise(mean_temp = mean(`Avg_Temp_(C)`)) %>%
  ggplot(aes(Year, mean_temp)) +
  geom_line(color = "forestgreen", linewidth = 1.2) +
  geom_smooth(method = "loess", se = FALSE, color = "orange") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small.  fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2020
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.02
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4.0804

climate_imputed %>%
  group_by(Year) %>%
  summarise(mean_co2 = mean(`CO2_Concentration_(ppm)`)) %>%
  ggplot(aes(Year, mean_co2)) +
  geom_line(color = "purple", linewidth = 1.2) +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(factor(Month), `Avg_Temp_(C)`, fill = factor(Month))) +
  geom_boxplot(show.legend = FALSE) +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(factor(Month), `CO2_Concentration_(ppm)`, fill = factor(Month))) +
  geom_boxplot(show.legend = FALSE) +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(`CO2_Concentration_(ppm)`, `Avg_Temp_(C)`, color = factor(Year))) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

climate_imputed %>%
  select(where(is.numeric)) %>%
  ggpairs(progress = FALSE)

climate_imputed %>%
  ggplot(aes(Year, `Humidity_()`)) +
  geom_line(color = "blue") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, `Wind_Speed_(ms)`)) +
  geom_line(color = "red") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, `Solar_Irradiance_(Wm2)`)) +
  geom_line(color = "gold") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, `Cloud_Cover_()`)) +
  geom_line(color = "gray") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, `Particulate_Matter_(gm3)`)) +
  geom_line(color = "brown") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, `Sea_Surface_Temp_(C)`)) +
  geom_line(color = "darkgreen") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, `Precipitation_(mm)`)) +
  geom_line(color = "darkblue") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, Vegetation_Index)) +
  geom_line(color = "forestgreen") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, ENSO_Index)) +
  geom_line(color = "purple") +
  theme_minimal()

climate_imputed %>%
  ggplot(aes(Year, Urbanization_Index)) +
  geom_line(color = "brown") +
  theme_minimal()

climate_imputed %>%
  group_by(Month) %>%
  summarise(mean_temp = mean(`Avg_Temp_(C)`)) %>%
  ggplot(aes(Month, mean_temp)) +
  geom_line(color = "darkred") +
  theme_minimal()

cor_matrix <- climate_imputed %>%
  select(where(is.numeric)) %>%
  cor(use = "pairwise.complete.obs")

cor_df <- as.data.frame(as.table(cor_matrix))

ggplot(cor_df, aes(Var1, Var2, fill = Freq)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "steelblue") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))