Seeding Project

Author

Sean Lawler

# Set working directory
setwd("~/Downloads/Intro to R/Cloud-Project")

# Load required packages
# install.packages(c("tidyverse", "ggpubr", "broom", "plotly"))
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(broom)
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
library(shiny)
library(gt)


# Load data
clouds <- read.csv("clouds.csv")
str(clouds)
'data.frame':   24 obs. of  8 variables:
 $ X         : int  1 2 3 4 5 6 7 8 9 10 ...
 $ seeding   : chr  "no" "yes" "yes" "no" ...
 $ time      : int  0 1 3 4 6 9 18 25 27 28 ...
 $ sne       : num  1.75 2.7 4.1 2.35 4.25 1.6 1.3 3.35 2.85 2.2 ...
 $ cloudcover: num  13.4 37.9 3.9 5.3 7.1 6.9 4.6 4.9 12.1 5.2 ...
 $ prewetness: num  0.274 1.267 0.198 0.526 0.25 ...
 $ echomotion: chr  "stationary" "moving" "stationary" "moving" ...
 $ rainfall  : num  12.85 5.52 6.29 6.11 2.45 ...
summary(clouds)
       X           seeding               time            sne       
 Min.   : 1.00   Length:24          Min.   : 0.00   Min.   :1.300  
 1st Qu.: 6.75   Class :character   1st Qu.:15.75   1st Qu.:2.612  
 Median :12.50   Mode  :character   Median :32.50   Median :3.250  
 Mean   :12.50                      Mean   :35.33   Mean   :3.169  
 3rd Qu.:18.25                      3rd Qu.:55.25   3rd Qu.:3.962  
 Max.   :24.00                      Max.   :83.00   Max.   :4.650  
   cloudcover       prewetness      echomotion           rainfall     
 Min.   : 2.200   Min.   :0.0180   Length:24          Min.   : 0.280  
 1st Qu.: 3.750   1st Qu.:0.1405   Class :character   1st Qu.: 2.342  
 Median : 5.250   Median :0.2220   Mode  :character   Median : 4.335  
 Mean   : 7.246   Mean   :0.3271                      Mean   : 4.403  
 3rd Qu.: 7.175   3rd Qu.:0.3297                      3rd Qu.: 5.575  
 Max.   :37.900   Max.   :1.2670                      Max.   :12.850  
# Summary statistics by seeding
clouds %>%
  group_by(seeding) %>%
  summarise(
    mean_rainfall = mean(rainfall, na.rm = TRUE),
    sd_rainfall = sd(rainfall, na.rm = TRUE),
    n = n()
  )
# A tibble: 2 × 4
  seeding mean_rainfall sd_rainfall     n
  <chr>           <dbl>       <dbl> <int>
1 no               4.17        3.52    12
2 yes              4.63        2.78    12
# Boxplot
p1 <- ggboxplot(clouds, x = "seeding", y = "rainfall",
                color = "seeding", palette = "jco",
                add = "jitter") +
       theme_minimal()
print(p1)

# T-test
print(t.test(rainfall ~ seeding, data = clouds))

    Welch Two Sample t-test

data:  rainfall by seeding
t = -0.3574, df = 20.871, p-value = 0.7244
alternative hypothesis: true difference in means between group no and group yes is not equal to 0
95 percent confidence interval:
 -3.154691  2.229691
sample estimates:
 mean in group no mean in group yes 
         4.171667          4.634167 
# Multiple Linear Regression Model
model1 <- lm(rainfall ~ seeding + cloudcover + prewetness + echomotion + sne, data = clouds)
print(summary(model1))

Call:
lm(formula = rainfall ~ seeding + cloudcover + prewetness + echomotion + 
    sne, data = clouds)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.1158 -1.7078 -0.2422  1.3368  6.4827 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)  
(Intercept)           6.37680    2.43432   2.620   0.0174 *
seedingyes            1.12011    1.20725   0.928   0.3658  
cloudcover            0.01821    0.11508   0.158   0.8761  
prewetness            2.55109    2.70090   0.945   0.3574  
echomotionstationary  2.59855    1.54090   1.686   0.1090  
sne                  -1.27530    0.68015  -1.875   0.0771 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.855 on 18 degrees of freedom
Multiple R-squared:  0.3403,    Adjusted R-squared:  0.157 
F-statistic: 1.857 on 5 and 18 DF,  p-value: 0.1524
print(anova(model1))
Analysis of Variance Table

Response: rainfall
           Df  Sum Sq Mean Sq F value  Pr(>F)  
seeding     1   1.283  1.2834  0.1575 0.69613  
cloudcover  1  15.738 15.7377  1.9313 0.18157  
prewetness  1   0.003  0.0027  0.0003 0.98557  
echomotion  1  29.985 29.9853  3.6798 0.07108 .
sne         1  28.649 28.6491  3.5158 0.07711 .
Residuals  18 146.677  8.1487                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# broom summaries
print(tidy(model1))
# A tibble: 6 × 5
  term                 estimate std.error statistic p.value
  <chr>                   <dbl>     <dbl>     <dbl>   <dbl>
1 (Intercept)            6.38       2.43      2.62   0.0174
2 seedingyes             1.12       1.21      0.928  0.366 
3 cloudcover             0.0182     0.115     0.158  0.876 
4 prewetness             2.55       2.70      0.945  0.357 
5 echomotionstationary   2.60       1.54      1.69   0.109 
6 sne                   -1.28       0.680    -1.88   0.0771
print(glance(model1))
# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
      <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
1     0.340         0.157  2.85      1.86   0.152     5  -55.8  126.  134.
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
# Compare models by seeding
seed_yes <- filter(clouds, seeding == "yes")
seed_no  <- filter(clouds, seeding == "no")

model_yes <- lm(rainfall ~ sne, data = seed_yes)
model_no  <- lm(rainfall ~ sne, data = seed_no)

print(summary(model_yes))

Call:
lm(formula = rainfall ~ sne, data = seed_yes)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.0134 -1.3297 -0.3276  0.6171  4.3867 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  12.0202     2.9774   4.037  0.00237 **
sne          -2.2180     0.8722  -2.543  0.02921 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.27 on 10 degrees of freedom
Multiple R-squared:  0.3927,    Adjusted R-squared:  0.332 
F-statistic: 6.467 on 1 and 10 DF,  p-value: 0.02921
print(summary(model_no))

Call:
lm(formula = rainfall ~ sne, data = seed_no)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.4892 -2.1762  0.2958  1.4902  7.3616 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)    7.319      3.160   2.317    0.043 *
sne           -1.046      0.995  -1.052    0.318  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.502 on 10 degrees of freedom
Multiple R-squared:  0.09957,   Adjusted R-squared:  0.009528 
F-statistic: 1.106 on 1 and 10 DF,  p-value: 0.3177
print(tidy(model_yes))
# A tibble: 2 × 5
  term        estimate std.error statistic p.value
  <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 (Intercept)    12.0      2.98       4.04 0.00237
2 sne            -2.22     0.872     -2.54 0.0292 
print(tidy(model_no))
# A tibble: 2 × 5
  term        estimate std.error statistic p.value
  <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 (Intercept)     7.32     3.16       2.32  0.0430
2 sne            -1.05     0.995     -1.05  0.318 
# Plotly interactive plot
gg <- ggplot(clouds, aes(x = sne, y = rainfall, color = seeding)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_minimal() +
  labs(title = "Rainfall vs SNE by Seeding",
       x = "Suitability Index (SNE)",
       y = "Rainfall (×10⁸ m³)")
print(ggplotly(gg))
`geom_smooth()` using formula = 'y ~ x'
#gt table
tidy(model1) %>%
  gt() %>%
  tab_header(
    title = "Model 1: Rainfall ~ Seeding + Covariates",
    subtitle = "Tidy summary of regression coefficients"
  ) %>%
  fmt_number(columns = c(estimate, std.error, statistic, p.value), decimals = 3)
Model 1: Rainfall ~ Seeding + Covariates
Tidy summary of regression coefficients
term estimate std.error statistic p.value
(Intercept) 6.377 2.434 2.620 0.017
seedingyes 1.120 1.207 0.928 0.366
cloudcover 0.018 0.115 0.158 0.876
prewetness 2.551 2.701 0.945 0.357
echomotionstationary 2.599 1.541 1.686 0.109
sne −1.275 0.680 −1.875 0.077
# Shiny input and reactive plot
selectInput("seeding_choice", "Choose Seeding Status:",
            choices = unique(clouds$seeding),
            selected = "yes")
filtered_data <- reactive({
  filter(clouds, seeding == input$seeding_choice)
})

renderPlot({
  ggplot(filtered_data(), aes(x = sne, y = rainfall)) +
    geom_point(color = "steelblue") +
    geom_smooth(method = "lm", se = FALSE, color = "darkred") +
    labs(title = paste("Rainfall vs SNE -", input$seeding_choice),
         x = "SNE", y = "Rainfall (×10⁸ m³)") +
    theme_minimal()
})
# Model diagnostics
plot(model1, which = 1)  # Residuals vs Fitted

plot(model1, which = 2)  # Q-Q Plot