Marine ecosystems are strongly affected by climate variables such as
sea surface temperature (SST),
pH levels, and marine heatwaves.
Corals are especially sensitive, showing stress through bleaching
events.
This project uses a Kaggle dataset containing:
data <- read_csv("realistic_ocean_climate_dataset.csv")
## Rows: 500 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Location, Bleaching Severity
## dbl (5): Latitude, Longitude, SST (°C), pH Level, Species Observed
## lgl (1): Marine Heatwave
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 9
## Date Location Latitude Longitude `SST (°C)` `pH Level`
## <date> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2015-01-01 Red Sea 20.0 38.5 29.5 8.11
## 2 2015-01-07 Great Barrier Reef -18.3 148. 29.6 8.00
## 3 2015-01-14 Caribbean Sea 15.0 -75.0 28.9 7.95
## 4 2015-01-20 Great Barrier Reef -18.3 148. 29.0 8.00
## 5 2015-01-27 Galápagos -0.880 -91.0 28.6 7.98
## 6 2015-02-02 Red Sea 20.0 38.4 29.1 8.01
## # ℹ 3 more variables: `Bleaching Severity` <chr>, `Species Observed` <dbl>,
## # `Marine Heatwave` <lgl>
data_clean <- data %>%
mutate(
Date = ymd(Date),
BleachingSeverity = factor(`Bleaching Severity`,
levels = c("None","Low","Medium","High")),
MarineHeatwave = as.logical(`Marine Heatwave`)
) %>%
drop_na()
summary(data_clean)
## Date Location Latitude Longitude
## Min. :2015-01-01 Length:500 Min. :-18.3830 Min. :-155.72
## 1st Qu.:2017-04-01 Class :character 1st Qu.: -0.9329 1st Qu.: -90.93
## Median :2019-07-01 Mode :character Median : 10.0188 Median : 38.52
## Mean :2019-07-01 Mean : 6.7284 Mean : 11.93
## 3rd Qu.:2021-09-29 3rd Qu.: 19.8910 3rd Qu.: 114.99
## Max. :2023-12-31 Max. : 20.1220 Max. : 147.83
## SST (°C) pH Level Bleaching Severity Species Observed
## Min. :23.64 Min. :7.872 Length:500 Min. : 54.0
## 1st Qu.:27.53 1st Qu.:8.011 Class :character 1st Qu.:107.8
## Median :28.52 Median :8.052 Mode :character Median :120.0
## Mean :28.54 Mean :8.050 Mean :120.5
## 3rd Qu.:29.45 3rd Qu.:8.085 3rd Qu.:133.2
## Max. :33.21 Max. :8.195 Max. :171.0
## Marine Heatwave BleachingSeverity MarineHeatwave
## Mode :logical None :150 Mode :logical
## FALSE:427 Low :132 FALSE:427
## TRUE :73 Medium:130 TRUE :73
## High : 88
##
##
ggplot(data_clean, aes(Date, `SST (°C)`, color = Location)) +
geom_line(linewidth = 1) +
theme_minimal() +
labs(title = "Sea Surface Temperature Over Time")
ggplot(data_clean, aes(Date, `pH Level`, color = Location)) +
geom_line(linewidth = 1) +
theme_minimal() +
labs(title = "Ocean pH Over Time")
ggplot(data_clean, aes(Date, `Species Observed`, color = Location)) +
geom_line(linewidth = 1) +
theme_minimal() +
labs(title = "Species Richness Trends Over Time")
ggplot(data_clean, aes(BleachingSeverity, fill = Location)) +
geom_bar(position = "dodge") +
theme_minimal() +
labs(title = "Distribution of Coral Bleaching Severity")
ggplot(data_clean, aes(`SST (°C)`, `pH Level`, color = BleachingSeverity)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "SST vs pH Level Colored by Bleaching Severity")
ggplot(data_clean, aes(
x = `SST (°C)`,
y = `pH Level`,
size = `Species Observed`,
color = BleachingSeverity
)) +
geom_point(alpha = 0.6) +
scale_size(range = c(2, 12)) +
theme_minimal() +
labs(title = "3D Bubble Plot: SST vs pH vs Species Observed")
plot_ly(
data_clean,
x = ~`SST (°C)`,
y = ~`pH Level`,
z = ~`Species Observed`,
type = "scatter3d",
mode = "markers",
color = ~BleachingSeverity,
marker = list(size = 5)
)
plot_ly(
data_clean,
x = ~Longitude,
y = ~Latitude,
z = ~`Species Observed`,
type = "scatter3d",
mode = "markers",
color = ~`SST (°C)`,
marker = list(size = 5),
hoverinfo = "text",
text = ~paste(
"Location:", Location,
"<br>SST:", `SST (°C)`,"°C",
"<br>pH:", `pH Level`,
"<br>Bleaching:", BleachingSeverity
)
) %>%
layout(
title = "3D Geographic Map of Marine Conditions",
scene = list(
xaxis=list(title="Longitude"),
yaxis=list(title="Latitude"),
zaxis=list(title="Species Observed")
)
)
ggplot(data_clean, aes(MarineHeatwave, fill = BleachingSeverity)) +
geom_bar(position = "fill") +
theme_minimal() +
labs(title = "Impact of Marine Heatwaves on Coral Bleaching",
y = "Proportion")
num_data <- data_clean %>%
select(`SST (°C)`, `pH Level`, `Species Observed`)
corrplot(cor(num_data), method = "color", addCoef.col = "black")
model <- lm(`Species Observed` ~ `SST (°C)`, data = data_clean)
summary(model)
##
## Call:
## lm(formula = `Species Observed` ~ `SST (°C)`, data = data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.867 -10.198 -0.615 10.159 56.701
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 399.8114 13.4665 29.69 <2e-16 ***
## `SST (°C)` -9.7886 0.4713 -20.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15 on 498 degrees of freedom
## Multiple R-squared: 0.4641, Adjusted R-squared: 0.4631
## F-statistic: 431.4 on 1 and 498 DF, p-value: < 2.2e-16
ggplot(data_clean, aes(`SST (°C)`, `Species Observed`)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
theme_minimal() +
labs(title = "Regression: SST vs Species Observed")
## `geom_smooth()` using formula = 'y ~ x'
From the analysis:
This study supports well-documented scientific evidence that warming and acidification are the largest threats to coral ecosystems globally.