setwd("/Users/rokayaaqrabawi/Documents/EMES_203_Homework")
fault_data <- readRDS("REDFAULT.RDS")
str(fault_data)
## num [1:64, 1:3] 1.48 4.75 8.64 13.18 17.95 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:3] "X" "Y" "Angle"
Part 1: This part looks at fault orientation data by plotting angles on a unit circle and finding the average orientation.
# Pull out orientation angles
orientation_angle <- fault_data[, "Angle"]
head(orientation_angle)
## [1] 215.9 40.8 222.5 228.1 46.2 40.8
# Load ggplot2
library(ggplot2)
# Convert Angles -> Radians
radians <- orientation_angle * pi / 180
# Create x & y coordinates for the unit circle
x_coords <- cos(radians)
y_coords <- sin(radians)
# Data Frame
unit_circle_data <- data.frame(x_coords, y_coords)
# Plot Unit Circle
ggplot(unit_circle_data, aes(x = x_coords, y = y_coords)) +
geom_point(color = "pink") + # Plot points
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
coord_fixed(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) + # Ensure full circle is visible
labs(title = "Angles on the Unit Circle",
x = "Cos(Angle)",
y = "Sin(Angle)") +
theme_minimal()
circle <- data.frame(
x = cos(seq(0, 2 * pi, length.out = 100)),
y = sin(seq(0, 2 * pi, length.out = 100))
)
ggplot() +
geom_path(data = circle, aes(x = x, y = y), color = "gray") +
geom_point(data = unit_circle_data, aes(x = x_coords, y = y_coords), color = "pink") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
coord_fixed(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) +
labs(title = "Angles on the Unit Circle",
x = "Cos(Angle)",
y = "Sin(Angle)") +
theme_minimal()
# Calculate mean orientation (radians)
mean_orientation_radians <- atan2(mean(sin(radians)), mean(cos(radians)))
# Convert mean orientation (degrees)
mean_orientation_degrees <- mean_orientation_radians * 180 / pi
# Add mean orientation arrow
ggplot() +
geom_path(data = circle, aes(x = x, y = y), color = "gray") + #circle outline
geom_point(data = unit_circle_data, aes(x = x_coords, y = y_coords), color = "pink") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_segment(aes(x = 0, y = 0,
xend = cos(mean_orientation_radians),
yend = sin(mean_orientation_radians)),
arrow = arrow(length = unit(0.2, "inches"), type = "closed"),
color = "purple4", size = 1.5) +
coord_fixed(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) +
labs(title = "Angles with Mean Orientation on Unit Circle",
x = "Cos(Angle)",
y = "Sin(Angle)") +
theme_minimal()
# Print mean orientation in degrees
mean_orientation_degrees
## [1] -168.7494
Part 2: This part looks at temperature and chlorophyll data from the Galapagos Islands to see patterns, changes, and differences between years.
library(dplyr)
# Load datasets
galapagos_2014 <- read.csv("Galapagos_2014.csv")
galapagos_2015 <- read.csv("Galapagos_2015.csv")
galapagos_2016 <- read.csv("Galapagos_2016.csv")
galapagos_2014$Latitude <- as.numeric(galapagos_2014$Latitude)
galapagos_2015$Latitude <- as.numeric(galapagos_2015$Latitude)
galapagos_2016$Latitude <- as.numeric(galapagos_2016$Latitude)
galapagos_2014$Longitude <- as.numeric(galapagos_2014$Longitude)
galapagos_2015$Longitude <- as.numeric(galapagos_2015$Longitude)
galapagos_2016$Longitude <- as.numeric(galapagos_2016$Longitude)
# Add Year Column
galapagos_2014$Year <- 2014
galapagos_2015$Year <- 2015
galapagos_2016$Year <- 2016
# Combine datasets
galapagos_data <- bind_rows(galapagos_2014, galapagos_2015, galapagos_2016)
# View Structure
str(galapagos_data)
## 'data.frame': 168 obs. of 19 variables:
## $ Year : num 2014 2014 2014 2014 2014 ...
## $ Date : chr "10/11/2014" "10/11/2014" "10/11/2014" "10/11/2014" ...
## $ Time_GMT : chr "13:00:00" "13:00:00" "13:00:00" "20:00:00" ...
## $ Season : chr "Winter" "Winter" "Winter" "Winter" ...
## $ Latitude : num NA NA NA NA NA NA NA NA NA NA ...
## $ Longitude : num NA NA NA NA NA NA NA NA NA NA ...
## $ Salinity_psu: num 34.8 34.8 34.8 NA NA ...
## $ Temp_C : num 20.7 20.7 20.7 NA NA ...
## $ Fluor_ug_L : num -0.0673 -0.0673 -0.0673 NA NA ...
## $ Station : int 3 3 3 4 4 4 5 5 5 7 ...
## $ Depth_m : num 4 4 4 NA NA NA 7 7 7 8 ...
## $ ID : chr "1-1" "1-1" "1-1" "2-1" ...
## $ Replicate : chr "A" "B" "C" "A" ...
## $ NO3_uM : num 6.08 NA 5.05 NA NA NA 5.22 NA 5.31 6.7 ...
## $ PO4_uM : num 0.53 NA 0.532 NA NA NA 0.483 NA 0.549 0.514 ...
## $ SiOH4_uM : num 1.66 NA 0.1 NA NA NA 2.7 NA 3.37 4.33 ...
## $ X5_Chl : num 0.669 0.404 0.9 NA NA ...
## $ GFF_Chl : num 0.977 0.806 0.971 NA NA ...
## $ Chla_Total : num 1.65 1.21 1.87 NA NA ...
# Filter out rows w/ missing temperature or chlorophyll data
filtered_data <- galapagos_data %>%
filter(!is.na(Temp_C), !is.na(Chla_Total))
# View structure
str(filtered_data)
## 'data.frame': 132 obs. of 19 variables:
## $ Year : num 2014 2014 2014 2014 2014 ...
## $ Date : chr "10/11/2014" "10/11/2014" "10/11/2014" "10/12/2014" ...
## $ Time_GMT : chr "13:00:00" "13:00:00" "13:00:00" "12:30:00" ...
## $ Season : chr "Winter" "Winter" "Winter" "Winter" ...
## $ Latitude : num NA NA NA NA NA NA NA NA NA NA ...
## $ Longitude : num NA NA NA NA NA NA NA NA NA NA ...
## $ Salinity_psu: num 34.8 34.8 34.8 34.3 34.3 ...
## $ Temp_C : num 20.7 20.7 20.7 21.3 21.3 ...
## $ Fluor_ug_L : num -0.0673 -0.0673 -0.0673 -0.1016 -0.1016 ...
## $ Station : int 3 3 3 5 5 5 7 7 7 9 ...
## $ Depth_m : num 4 4 4 7 7 7 8 8 8 8 ...
## $ ID : chr "1-1" "1-1" "1-1" "3-1" ...
## $ Replicate : chr "A" "B" "C" "A" ...
## $ NO3_uM : num 6.08 NA 5.05 5.22 NA 5.31 6.7 NA 6.26 3.56 ...
## $ PO4_uM : num 0.53 NA 0.532 0.483 NA 0.549 0.514 NA 0.587 0.386 ...
## $ SiOH4_uM : num 1.66 NA 0.1 2.7 NA 3.37 4.33 NA 2.42 1.86 ...
## $ X5_Chl : num 0.669 0.404 0.9 0.317 0.36 ...
## $ GFF_Chl : num 0.977 0.806 0.971 0.232 0.152 ...
## $ Chla_Total : num 1.645 1.21 1.871 0.548 0.512 ...
# Convert Year to Factor
filtered_data$Year <- as.factor(filtered_data$Year)
# View Structure
str(filtered_data)
## 'data.frame': 132 obs. of 19 variables:
## $ Year : Factor w/ 3 levels "2014","2015",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Date : chr "10/11/2014" "10/11/2014" "10/11/2014" "10/12/2014" ...
## $ Time_GMT : chr "13:00:00" "13:00:00" "13:00:00" "12:30:00" ...
## $ Season : chr "Winter" "Winter" "Winter" "Winter" ...
## $ Latitude : num NA NA NA NA NA NA NA NA NA NA ...
## $ Longitude : num NA NA NA NA NA NA NA NA NA NA ...
## $ Salinity_psu: num 34.8 34.8 34.8 34.3 34.3 ...
## $ Temp_C : num 20.7 20.7 20.7 21.3 21.3 ...
## $ Fluor_ug_L : num -0.0673 -0.0673 -0.0673 -0.1016 -0.1016 ...
## $ Station : int 3 3 3 5 5 5 7 7 7 9 ...
## $ Depth_m : num 4 4 4 7 7 7 8 8 8 8 ...
## $ ID : chr "1-1" "1-1" "1-1" "3-1" ...
## $ Replicate : chr "A" "B" "C" "A" ...
## $ NO3_uM : num 6.08 NA 5.05 5.22 NA 5.31 6.7 NA 6.26 3.56 ...
## $ PO4_uM : num 0.53 NA 0.532 0.483 NA 0.549 0.514 NA 0.587 0.386 ...
## $ SiOH4_uM : num 1.66 NA 0.1 2.7 NA 3.37 4.33 NA 2.42 1.86 ...
## $ X5_Chl : num 0.669 0.404 0.9 0.317 0.36 ...
## $ GFF_Chl : num 0.977 0.806 0.971 0.232 0.152 ...
## $ Chla_Total : num 1.645 1.21 1.871 0.548 0.512 ...
# Summarize mean & standard deviation (temperature & chlorophyll)
summary_of_data <- filtered_data %>%
group_by(Year, Station) %>%
summarize(
mean_temp = mean(Temp_C, na.rm = TRUE),
sd_temp = sd(Temp_C, na.rm = TRUE),
mean_chla = mean(Chla_Total, na.rm = TRUE),
sd_chla = sd(Chla_Total, na.rm = TRUE)
)
# View Summary Data
head(summary_of_data)
## # A tibble: 6 × 6
## # Groups: Year [1]
## Year Station mean_temp sd_temp mean_chla sd_chla
## <fct> <int> <dbl> <dbl> <dbl> <dbl>
## 1 2014 3 20.7 0 1.58 0.336
## 2 2014 5 21.3 0 0.509 0.0410
## 3 2014 7 22.1 0 0.739 0.269
## 4 2014 9 22.7 0 0.448 0.163
## 5 2014 11 23.0 0 0.438 0.0750
## 6 2014 12 22.0 0 0.181 0.0506
# Load ggplot2 for plotting
library(ggplot2)
# Plot temperature data
ggplot(summary_of_data, aes(x = Station, y = mean_temp, color = Year)) +
geom_line(size = 1) +
geom_errorbar(aes(ymin = mean_temp - sd_temp, ymax = mean_temp + sd_temp), width = 0.2) +
labs(title = "Temperature by Station and Year",
x = "Station",
y = "Mean Temperature (°C)") +
theme_minimal()
# Load ggplot2 for plotting
library(ggplot2)
# Plot temp data
ggplot(summary_of_data, aes(x = Station, y = mean_temp, color = Year)) +
geom_line(size = 1) +
geom_errorbar(aes(ymin = mean_temp - sd_temp, ymax = mean_temp + sd_temp), width = 0.2) +
labs(title = "Temperature by Station and Year",
x = "Station",
y = "Mean Temperature (°C)") +
theme_minimal()
# Plot chlorophyll data
ggplot(summary_of_data, aes(x = Station, y = mean_chla, color = Year)) +
geom_line(size = 1) +
geom_errorbar(aes(ymin = mean_chla - sd_chla, ymax = mean_chla + sd_chla), width = 0.2) +
labs(title = "Chlorophyll by Station and Year",
x = "Station",
y = "Mean Chlorophyll (µg/L)") +
theme_minimal()
# One-way ANOVA for temp
anova_temp <- aov(Temp_C ~ Year, data = filtered_data)
# Summary of ANOVA
summary(anova_temp)
## Df Sum Sq Mean Sq F value Pr(>F)
## Year 2 496.5 248.27 106.4 <2e-16 ***
## Residuals 129 301.0 2.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Box plot for temp
ggplot(filtered_data, aes(x = Year, y = Temp_C, fill = Year)) +
geom_boxplot() +
labs(
title = "Distribution of Temperature by Year",
x = "Year",
y = "Temperature (°C)"
) +
theme_minimal()
# Box plot for chlorophyll
ggplot(filtered_data, aes(x = Year, y = Chla_Total, fill = Year)) +
geom_boxplot() +
labs(
title = "Distribution of Chlorophyll by Year",
x = "Year",
y = "Chlorophyll (µg/L)"
) +
theme_minimal()
# Perform one-way ANOVA for chlorophyll
anova_chla <- aov(Chla_Total ~ Year, data = filtered_data)
# Summary of ANOVA results for chlorophyll
summary(anova_chla)
## Df Sum Sq Mean Sq F value Pr(>F)
## Year 2 3.45 1.7252 6.188 0.00272 **
## Residuals 129 35.97 0.2788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform Tukey test for temperature
tukey_temp <- TukeyHSD(anova_temp)
# Display Tukey test results for temperature
tukey_temp
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Temp_C ~ Year, data = filtered_data)
##
## $Year
## diff lwr upr p adj
## 2015-2014 2.491656 1.706341 3.276971 0e+00
## 2016-2014 -2.052628 -2.866625 -1.238631 1e-07
## 2016-2015 -4.544284 -5.286215 -3.802354 0e+00
# Plot Tukey test results
plot(tukey_temp, las = 1, col = "darkblue")
# Perform Tukey test for chlorophyll
tukey_chla <- TukeyHSD(anova_chla)
# Display Tukey test results for chlorophyll
tukey_chla
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Chla_Total ~ Year, data = filtered_data)
##
## $Year
## diff lwr upr p adj
## 2015-2014 -0.28453910 -0.5559880 -0.01309022 0.0375817
## 2016-2014 0.07458258 -0.2067805 0.35594562 0.8047348
## 2016-2015 0.35912168 0.1026690 0.61557437 0.0033318
# Plot Tukey test results
plot(tukey_chla, las = 1, col = "darkgreen")
Tukey Test The Tukey Test shows which years
had big differences in mean chlorophyll levels:
- 2015 vs. 2014: A small but noticeable drop in chlorophyll was found (p
= 0.04).
- 2016 vs. 2014: No big difference was found (p > 0.05).
- 2016 vs. 2015: A clear increase in chlorophyll was found (p =
0.003).
The Tukey plot backs this up, showing confidence intervals where
crossing zero meaning no difference.
Box Plot for Chlorophyll: The box plot that I created demonstrates the differences and variation in chlorophyll levels. Chlorophyll levels in the year 2015 seem to be lower & less variable compared to the years 2014 & 2016. The median chlorophyll concentration has increased significantly from the years 2015 to 2016.