setwd("/Users/rokayaaqrabawi/Documents/EMES_203_Homework")

fault_data <- readRDS("REDFAULT.RDS")

str(fault_data)
##  num [1:64, 1:3] 1.48 4.75 8.64 13.18 17.95 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "X" "Y" "Angle"

Part 1: This part looks at fault orientation data by plotting angles on a unit circle and finding the average orientation.

# Pull out orientation angles
orientation_angle <- fault_data[, "Angle"]
head(orientation_angle)
## [1] 215.9  40.8 222.5 228.1  46.2  40.8
# Load ggplot2
library(ggplot2)

# Convert Angles -> Radians
radians <- orientation_angle * pi / 180

# Create x & y coordinates for the unit circle
x_coords <- cos(radians)
y_coords <- sin(radians)

# Data Frame
unit_circle_data <- data.frame(x_coords, y_coords)
# Plot Unit Circle
ggplot(unit_circle_data, aes(x = x_coords, y = y_coords)) +
  geom_point(color = "pink") + # Plot points
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  coord_fixed(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) + # Ensure full circle is visible
  labs(title = "Angles on the Unit Circle",
       x = "Cos(Angle)",
       y = "Sin(Angle)") +
  theme_minimal()

circle <- data.frame(
  x = cos(seq(0, 2 * pi, length.out = 100)),
  y = sin(seq(0, 2 * pi, length.out = 100))
)

ggplot() +
  geom_path(data = circle, aes(x = x, y = y), color = "gray") + 
  geom_point(data = unit_circle_data, aes(x = x_coords, y = y_coords), color = "pink") + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  coord_fixed(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) + 
  labs(title = "Angles on the Unit Circle",
       x = "Cos(Angle)",
       y = "Sin(Angle)") +
  theme_minimal()

# Calculate mean orientation (radians)
mean_orientation_radians <- atan2(mean(sin(radians)), mean(cos(radians)))

# Convert mean orientation (degrees)
mean_orientation_degrees <- mean_orientation_radians * 180 / pi

# Add  mean orientation arrow
ggplot() +
  geom_path(data = circle, aes(x = x, y = y), color = "gray") + #circle outline
  geom_point(data = unit_circle_data, aes(x = x_coords, y = y_coords), color = "pink") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  geom_segment(aes(x = 0, y = 0,
                   xend = cos(mean_orientation_radians),
                   yend = sin(mean_orientation_radians)),
               arrow = arrow(length = unit(0.2, "inches"), type = "closed"),
               color = "purple4", size = 1.5) + 
  coord_fixed(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) + 
  labs(title = "Angles with Mean Orientation on Unit Circle",
       x = "Cos(Angle)",
       y = "Sin(Angle)") +
  theme_minimal()

# Print mean orientation in degrees
mean_orientation_degrees
## [1] -168.7494

Part 2: This part looks at temperature and chlorophyll data from the Galapagos Islands to see patterns, changes, and differences between years.

library(dplyr)

# Load datasets
galapagos_2014 <- read.csv("Galapagos_2014.csv")
galapagos_2015 <- read.csv("Galapagos_2015.csv")
galapagos_2016 <- read.csv("Galapagos_2016.csv")

galapagos_2014$Latitude <- as.numeric(galapagos_2014$Latitude)
galapagos_2015$Latitude <- as.numeric(galapagos_2015$Latitude)
galapagos_2016$Latitude <- as.numeric(galapagos_2016$Latitude)

galapagos_2014$Longitude <- as.numeric(galapagos_2014$Longitude)
galapagos_2015$Longitude <- as.numeric(galapagos_2015$Longitude)
galapagos_2016$Longitude <- as.numeric(galapagos_2016$Longitude)

# Add Year Column
galapagos_2014$Year <- 2014
galapagos_2015$Year <- 2015
galapagos_2016$Year <- 2016

# Combine datasets
galapagos_data <- bind_rows(galapagos_2014, galapagos_2015, galapagos_2016)

# View Structure
str(galapagos_data)
## 'data.frame':    168 obs. of  19 variables:
##  $ Year        : num  2014 2014 2014 2014 2014 ...
##  $ Date        : chr  "10/11/2014" "10/11/2014" "10/11/2014" "10/11/2014" ...
##  $ Time_GMT    : chr  "13:00:00" "13:00:00" "13:00:00" "20:00:00" ...
##  $ Season      : chr  "Winter" "Winter" "Winter" "Winter" ...
##  $ Latitude    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Longitude   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Salinity_psu: num  34.8 34.8 34.8 NA NA ...
##  $ Temp_C      : num  20.7 20.7 20.7 NA NA ...
##  $ Fluor_ug_L  : num  -0.0673 -0.0673 -0.0673 NA NA ...
##  $ Station     : int  3 3 3 4 4 4 5 5 5 7 ...
##  $ Depth_m     : num  4 4 4 NA NA NA 7 7 7 8 ...
##  $ ID          : chr  "1-1" "1-1" "1-1" "2-1" ...
##  $ Replicate   : chr  "A" "B" "C" "A" ...
##  $ NO3_uM      : num  6.08 NA 5.05 NA NA NA 5.22 NA 5.31 6.7 ...
##  $ PO4_uM      : num  0.53 NA 0.532 NA NA NA 0.483 NA 0.549 0.514 ...
##  $ SiOH4_uM    : num  1.66 NA 0.1 NA NA NA 2.7 NA 3.37 4.33 ...
##  $ X5_Chl      : num  0.669 0.404 0.9 NA NA ...
##  $ GFF_Chl     : num  0.977 0.806 0.971 NA NA ...
##  $ Chla_Total  : num  1.65 1.21 1.87 NA NA ...
# Filter out rows w/ missing temperature or chlorophyll data
filtered_data <- galapagos_data %>%
  filter(!is.na(Temp_C), !is.na(Chla_Total))

# View structure
str(filtered_data)
## 'data.frame':    132 obs. of  19 variables:
##  $ Year        : num  2014 2014 2014 2014 2014 ...
##  $ Date        : chr  "10/11/2014" "10/11/2014" "10/11/2014" "10/12/2014" ...
##  $ Time_GMT    : chr  "13:00:00" "13:00:00" "13:00:00" "12:30:00" ...
##  $ Season      : chr  "Winter" "Winter" "Winter" "Winter" ...
##  $ Latitude    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Longitude   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Salinity_psu: num  34.8 34.8 34.8 34.3 34.3 ...
##  $ Temp_C      : num  20.7 20.7 20.7 21.3 21.3 ...
##  $ Fluor_ug_L  : num  -0.0673 -0.0673 -0.0673 -0.1016 -0.1016 ...
##  $ Station     : int  3 3 3 5 5 5 7 7 7 9 ...
##  $ Depth_m     : num  4 4 4 7 7 7 8 8 8 8 ...
##  $ ID          : chr  "1-1" "1-1" "1-1" "3-1" ...
##  $ Replicate   : chr  "A" "B" "C" "A" ...
##  $ NO3_uM      : num  6.08 NA 5.05 5.22 NA 5.31 6.7 NA 6.26 3.56 ...
##  $ PO4_uM      : num  0.53 NA 0.532 0.483 NA 0.549 0.514 NA 0.587 0.386 ...
##  $ SiOH4_uM    : num  1.66 NA 0.1 2.7 NA 3.37 4.33 NA 2.42 1.86 ...
##  $ X5_Chl      : num  0.669 0.404 0.9 0.317 0.36 ...
##  $ GFF_Chl     : num  0.977 0.806 0.971 0.232 0.152 ...
##  $ Chla_Total  : num  1.645 1.21 1.871 0.548 0.512 ...
# Convert Year to Factor
filtered_data$Year <- as.factor(filtered_data$Year)

# View Structure
str(filtered_data)
## 'data.frame':    132 obs. of  19 variables:
##  $ Year        : Factor w/ 3 levels "2014","2015",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : chr  "10/11/2014" "10/11/2014" "10/11/2014" "10/12/2014" ...
##  $ Time_GMT    : chr  "13:00:00" "13:00:00" "13:00:00" "12:30:00" ...
##  $ Season      : chr  "Winter" "Winter" "Winter" "Winter" ...
##  $ Latitude    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Longitude   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Salinity_psu: num  34.8 34.8 34.8 34.3 34.3 ...
##  $ Temp_C      : num  20.7 20.7 20.7 21.3 21.3 ...
##  $ Fluor_ug_L  : num  -0.0673 -0.0673 -0.0673 -0.1016 -0.1016 ...
##  $ Station     : int  3 3 3 5 5 5 7 7 7 9 ...
##  $ Depth_m     : num  4 4 4 7 7 7 8 8 8 8 ...
##  $ ID          : chr  "1-1" "1-1" "1-1" "3-1" ...
##  $ Replicate   : chr  "A" "B" "C" "A" ...
##  $ NO3_uM      : num  6.08 NA 5.05 5.22 NA 5.31 6.7 NA 6.26 3.56 ...
##  $ PO4_uM      : num  0.53 NA 0.532 0.483 NA 0.549 0.514 NA 0.587 0.386 ...
##  $ SiOH4_uM    : num  1.66 NA 0.1 2.7 NA 3.37 4.33 NA 2.42 1.86 ...
##  $ X5_Chl      : num  0.669 0.404 0.9 0.317 0.36 ...
##  $ GFF_Chl     : num  0.977 0.806 0.971 0.232 0.152 ...
##  $ Chla_Total  : num  1.645 1.21 1.871 0.548 0.512 ...
# Summarize mean & standard deviation (temperature & chlorophyll)
summary_of_data <- filtered_data %>%
  group_by(Year, Station) %>%
  summarize(
    mean_temp = mean(Temp_C, na.rm = TRUE),
    sd_temp = sd(Temp_C, na.rm = TRUE),
    mean_chla = mean(Chla_Total, na.rm = TRUE),
    sd_chla = sd(Chla_Total, na.rm = TRUE)
  )

# View Summary Data
head(summary_of_data)
## # A tibble: 6 × 6
## # Groups:   Year [1]
##   Year  Station mean_temp sd_temp mean_chla sd_chla
##   <fct>   <int>     <dbl>   <dbl>     <dbl>   <dbl>
## 1 2014        3      20.7       0     1.58   0.336 
## 2 2014        5      21.3       0     0.509  0.0410
## 3 2014        7      22.1       0     0.739  0.269 
## 4 2014        9      22.7       0     0.448  0.163 
## 5 2014       11      23.0       0     0.438  0.0750
## 6 2014       12      22.0       0     0.181  0.0506
# Load ggplot2 for plotting
library(ggplot2)

# Plot temperature data
ggplot(summary_of_data, aes(x = Station, y = mean_temp, color = Year)) +
  geom_line(size = 1) +
  geom_errorbar(aes(ymin = mean_temp - sd_temp, ymax = mean_temp + sd_temp), width = 0.2) +
  labs(title = "Temperature by Station and Year",
       x = "Station",
       y = "Mean Temperature (°C)") +
  theme_minimal()

# Load ggplot2 for plotting
library(ggplot2)

# Plot temp data
ggplot(summary_of_data, aes(x = Station, y = mean_temp, color = Year)) +
  geom_line(size = 1) +
  geom_errorbar(aes(ymin = mean_temp - sd_temp, ymax = mean_temp + sd_temp), width = 0.2) +
  labs(title = "Temperature by Station and Year",
       x = "Station",
       y = "Mean Temperature (°C)") +
  theme_minimal()

# Plot chlorophyll data
ggplot(summary_of_data, aes(x = Station, y = mean_chla, color = Year)) +
  geom_line(size = 1) +
  geom_errorbar(aes(ymin = mean_chla - sd_chla, ymax = mean_chla + sd_chla), width = 0.2) +
  labs(title = "Chlorophyll by Station and Year",
       x = "Station",
       y = "Mean Chlorophyll (µg/L)") +
  theme_minimal()

# One-way ANOVA for temp
anova_temp <- aov(Temp_C ~ Year, data = filtered_data)

# Summary of ANOVA 
summary(anova_temp)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Year          2  496.5  248.27   106.4 <2e-16 ***
## Residuals   129  301.0    2.33                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Box plot for temp
ggplot(filtered_data, aes(x = Year, y = Temp_C, fill = Year)) +
  geom_boxplot() +
  labs(
    title = "Distribution of Temperature by Year",
    x = "Year",
    y = "Temperature (°C)"
  ) +
  theme_minimal()

# Box plot for chlorophyll
ggplot(filtered_data, aes(x = Year, y = Chla_Total, fill = Year)) +
  geom_boxplot() +
  labs(
    title = "Distribution of Chlorophyll by Year",
    x = "Year",
    y = "Chlorophyll (µg/L)"
  ) +
  theme_minimal()

# Perform one-way ANOVA for chlorophyll
anova_chla <- aov(Chla_Total ~ Year, data = filtered_data)

# Summary of ANOVA results for chlorophyll
summary(anova_chla)
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## Year          2   3.45  1.7252   6.188 0.00272 **
## Residuals   129  35.97  0.2788                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform Tukey test for temperature
tukey_temp <- TukeyHSD(anova_temp)

# Display Tukey test results for temperature
tukey_temp
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Temp_C ~ Year, data = filtered_data)
## 
## $Year
##                diff       lwr       upr p adj
## 2015-2014  2.491656  1.706341  3.276971 0e+00
## 2016-2014 -2.052628 -2.866625 -1.238631 1e-07
## 2016-2015 -4.544284 -5.286215 -3.802354 0e+00
# Plot Tukey test results
plot(tukey_temp, las = 1, col = "darkblue")

# Perform Tukey test for chlorophyll
tukey_chla <- TukeyHSD(anova_chla)

# Display Tukey test results for chlorophyll
tukey_chla
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Chla_Total ~ Year, data = filtered_data)
## 
## $Year
##                  diff        lwr         upr     p adj
## 2015-2014 -0.28453910 -0.5559880 -0.01309022 0.0375817
## 2016-2014  0.07458258 -0.2067805  0.35594562 0.8047348
## 2016-2015  0.35912168  0.1026690  0.61557437 0.0033318
# Plot Tukey test results
plot(tukey_chla, las = 1, col = "darkgreen")

Interpretation of Results

Tukey Test The Tukey Test shows which years had big differences in mean chlorophyll levels:
- 2015 vs. 2014: A small but noticeable drop in chlorophyll was found (p = 0.04).
- 2016 vs. 2014: No big difference was found (p > 0.05).
- 2016 vs. 2015: A clear increase in chlorophyll was found (p = 0.003).
The Tukey plot backs this up, showing confidence intervals where crossing zero meaning no difference.

Box Plot for Chlorophyll: The box plot that I created demonstrates the differences and variation in chlorophyll levels. Chlorophyll levels in the year 2015 seem to be lower & less variable compared to the years 2014 & 2016. The median chlorophyll concentration has increased significantly from the years 2015 to 2016.