R Data importation and cleaning

hail_data <- read_excel("C:\\Users\\User\\Desktop\\Copy of HAILSTORM_IMPACT_DATA(1).xlsx")

hail_data <- hail_data %>%
  rename(
    Girth1 = `...4`,
    Girth2 = `...5`,
    Girth3 = `...6`,
    Flower_Bud = `FLOWER/BUD`
  ) %>%
  mutate(
    across(c(PLOT_NO., VEGETATIVE, Girth1, Girth2, Girth3, `Girth size (mm)`, Flower_Bud), as.numeric)
  ) %>%
  filter(!is.na(PLOT_NO.))

Data visualization

Girth size distribution

ggplot(hail_data, aes(y = `Girth size (mm)`)) +
  geom_boxplot(fill = "orange", alpha = 0.5) +
  labs(title = "Distribution of Girth Size (mm)")

Flower burd abortion (%)

ggplot(hail_data, aes(y = Flower_Bud)) +
  geom_boxplot(fill = "tomato", alpha = 0.5) +
  labs(title = "Distribution of Flower/Bud Abortion (%)")

## Stem posture

ggplot(hail_data, aes(y = STEM)) +
  geom_boxplot(fill = "skyblue", alpha = 0.5) +
  labs(title = "Distribution of stem strength")

## Correlation in the data

numeric_data <- hail_data %>%
  select(STEM, VEGETATIVE, Flower_Bud, `Girth size (mm)`)%>%
  na.omit()

cor_matrix <- cor(numeric_data)
round(cor_matrix, 2)
##                 STEM VEGETATIVE Flower_Bud Girth size (mm)
## STEM            1.00       0.43       0.33            0.07
## VEGETATIVE      0.43       1.00       0.38            0.00
## Flower_Bud      0.33       0.38       1.00           -0.06
## Girth size (mm) 0.07       0.00      -0.06            1.00

Display of relationships

ggplot(hail_data, aes(x = 'Girth size (mm)', y= Flower_Bud)) +
  geom_point(color = "darkred", alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  labs(title = "Flower Abortion vs Girth Size", x = "Girth (mm)", y = "Flower/Bud Abortion (%)")

### Stem Strength vs Flower Abortion

ggplot(hail_data, aes(x = STEM, y = Flower_Bud)) +
  geom_jitter(width = 0.2, color = "purple", alpha = 0.6) +
  labs(title = "Stem Strength vs Bud Abortion", x = "Stem Score", y = "Flower/Bud Abortion (%)")

## 🧠 Linear Model: Predicting Flower Abortion

model <- lm(Flower_Bud ~ `Girth size (mm)` + STEM + VEGETATIVE, data = hail_data)
summary(model)
## 
## Call:
## lm(formula = Flower_Bud ~ `Girth size (mm)` + STEM + VEGETATIVE, 
##     data = hail_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -68.032 -14.028   4.703  16.875  55.899 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        18.1300     5.4283   3.340 0.000912 ***
## `Girth size (mm)`  -2.8686     1.8265  -1.571 0.117023    
## STEM                8.9696     2.0503   4.375 1.53e-05 ***
## VEGETATIVE          4.5332     0.7492   6.051 3.15e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.93 on 428 degrees of freedom
## Multiple R-squared:  0.186,  Adjusted R-squared:  0.1803 
## F-statistic: 32.61 on 3 and 428 DF,  p-value: < 2.2e-16

🏆 Top 10 Most Affected Plots

top_affected <- hail_data %>%
  arrange(desc(Flower_Bud)) %>%
  slice_head(n = 10)

knitr::kable(top_affected)
PLOT_NO. VEGETATIVE STEM Girth1 Girth2 Girth3 Girth size (mm) Flower_Bud
348 7.0 1 1.5 1.8 1.3 1.533333 97
345 9.0 3 2.0 1.3 1.8 1.700000 96
351 8.0 2 1.4 1.8 2.0 1.733333 96
191 9.5 1 2.0 2.3 2.6 2.300000 95
324 7.0 1 1.8 2.2 1.5 1.833333 95
334 9.0 2 1.6 1.4 1.7 1.566667 95
339 7.0 1 1.4 1.5 1.9 1.600000 95
340 7.0 2 1.5 2.0 1.8 1.766667 95
364 7.0 1 1.8 1.5 2.0 1.766667 95
378 9.0 3 2.1 1.6 2.3 2.000000 95

📊 Combined Dashboard

p1 <- ggplot(hail_data, aes(x = PLOT_NO., y = Flower_Bud)) +
  geom_col(fill = "tomato") +
  labs(title = "Flower Abortion per Plot", x = "Plot", y = "% Abortion") +
  theme_minimal()

p2 <- ggplot(hail_data, aes(x = PLOT_NO., y = STEM)) +
  geom_col(fill = "forestgreen") +
  labs(title = "Stem Strength per Plot", x = "Plot", y = "Stem Score") +
  theme_minimal()

p3 <- ggplot(hail_data, aes(x = PLOT_NO., y = `Girth size (mm)`)) +
  geom_col(fill = "steelblue") +
  labs(title = "Girth Size per Plot", x = "Plot", y = "Girth (mm)") +
  theme_minimal()

(p1 / p2 / p3)