questdf=read.csv("questData.csv")
#11 transects were surveyed at Leleiwi, counting juvenile acanthurus fishes and photographing benthic cover (which was processed for % cover in coral net, data was compiled based on functional groups algae, hard corals, etc.)

Assessing Normality

I am interested in the relationship between hard coral cover and the abundance of juvinile acanthurus fishes at Leleiwi.

library(ggpubr)

#normality of fish per square meter
hist(questdf$Fish_msqr) # Create histogram
abline(v=mean(questdf$Fish_msqr),col="blue") # add a line to the histogram to represent the mean value
abline(v=median(questdf$Fish_msqr),col="red") # add a line to the histogram to represent the median value

ggqqplot(questdf$Fish_msqr) # Create a qqplot

shapiro.test(questdf$Fish_msqr) # Shapiro test
## 
##  Shapiro-Wilk normality test
## 
## data:  questdf$Fish_msqr
## W = 0.93743, p-value = 0.4908
#Null hypothesis: The data are normally distributed.
#P-value=0.4908, FAIL TO REJECT
#Conclude: The data are normally distributed.

#normality of hard coral cover
hist(questdf$Hard_coral) # Create histogram
abline(v=mean(questdf$Hard_coral),col="blue") # add a line to the histogram to represent the mean value
abline(v=median(questdf$Hard_coral),col="red") # add a line to the histogram to represent the median value

ggqqplot(questdf$Hard_coral) # Create qqplot

shapiro.test(questdf$Hard_coral) # Shapiro test
## 
##  Shapiro-Wilk normality test
## 
## data:  questdf$Hard_coral
## W = 0.97788, p-value = 0.9532
#Null hypothesis: The data are normally distributed.
#P-value=0.9532, FAIL TO REJECT
#Conclude: The data are normally distributed.

Simple Linear Regression Model and Visualization

#Explanatory independent variable: percent hard coral cover per transect, Response variable: juvenile fish per meters squared
simple_lm_model = lm(Fish_msqr ~ Hard_coral, data = questdf)
summary(simple_lm_model)
## 
## Call:
## lm(formula = Fish_msqr ~ Hard_coral, data = questdf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84882 -0.65876 -0.02464  0.43593  1.26632 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.68026    0.57933   1.174   0.2704  
## Hard_coral   0.12204    0.03899   3.130   0.0121 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7503 on 9 degrees of freedom
## Multiple R-squared:  0.5212, Adjusted R-squared:  0.468 
## F-statistic: 9.796 on 1 and 9 DF,  p-value: 0.01212
#There is a significant relationship between hard coral cover and juvenile acan. fish per square meter because there is an *, indicating significance.
#The R squared value indicates what percentage of the variation in fish abundance was explained by hard coral cover (52.12%). The adjusted R squared value takes into account model complexity, penalizing additional independent variables. The adjusted value indicates what percentage of the variation in fish abundance was explained by hard coral cover (46.68%).

#define values to add to plot
p_value <- summary(simple_lm_model)$coefficients["Hard_coral", "Pr(>|t|)"]
r_squared <- summary(simple_lm_model)$r.squared

#Scatter plot of simple lm model
ggplot(questdf, aes(x = Fish_msqr, y = Hard_coral)) +
  geom_point(aes(alpha = 0.2)) +  # Scatterplot of the data points
  geom_smooth(method = "lm",se = FALSE) +  # Regression line
  labs(x = "Juvinile fish per square meter", y = "Hard coral percent cover") +  # Label the x and y axes
  theme_minimal() + #add transparency and grid lines NO!! see pirate lab
  guides(alpha = FALSE) +
  geom_text(aes(label = paste("p-value =", round(p_value, 4), "\nR-squared =", round(r_squared, 2))),
            x = Inf, y = -Inf, hjust = 1, vjust = -0.2)
## `geom_smooth()` using formula = 'y ~ x'

Mixed effects model using bioenv test and lme4

I ran the mixed effects model because I was interested if average depth and dive number affected the abundance of the fish, and since the fish are herbivorous, I wanted to include algal cover as well. First I used the bioenv analysis to identify the “Best Subset of Environmental Variables with Maximum (Rank) Correlation with Community Dissimilarities”.

library(lme4)
library(car)
library(vegan)
library(gridExtra)

#set community and environment for bioenv analysis
community = (questdf[, c("Fish_msqr", "T_number")]) #community, fish abundance
enviornment = questdf[, c("Algae","Hard_coral", "Other", "Hard_substrate", "Soft_substrate", "Palythoa")] #environment, benthic cover

##bioenv, Best Subset of Environmental Variables with Maximum (Rank) Correlation with Community Dissimilarities
bioenv_result <- bioenv(community, enviornment)
bioenv_result #identify best model for regression
## 
## Call:
## bioenv(comm = community, env = enviornment) 
## 
## Subset of environmental variables with best correlation to community data.
## 
## Correlations:    spearman 
## Dissimilarities: bray 
## Metric:          euclidean 
## 
## Best model has 3 parameters (max. 6 allowed):
## Algae Hard_coral Other
## with correlation  0.3914863
#used bioenv results in linear model
model2= lm(Fish_msqr ~ Hard_coral+Algae+Other, data=questdf)
summary(model2)
## 
## Call:
## lm(formula = Fish_msqr ~ Hard_coral + Algae + Other, data = questdf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7489 -0.6002 -0.1071  0.4747  1.1796 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.16434    1.87852   0.620   0.5550  
## Hard_coral   0.12289    0.04421   2.780   0.0273 *
## Algae       -0.01220    0.02919  -0.418   0.6885  
## Other        0.01470    0.05735   0.256   0.8051  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8289 on 7 degrees of freedom
## Multiple R-squared:  0.5454, Adjusted R-squared:  0.3505 
## F-statistic: 2.799 on 3 and 7 DF,  p-value: 0.1184
#Other does not appear significant

#Visualize relationship between Other and Hard coral and Algae
plot=ggplot(data=questdf)
p3=plot+
  geom_point(mapping = aes(x=Hard_coral, y=Other))+labs(title = "coral vs. Other")
p4=plot+
  geom_point(mapping = aes(x=Algae, y=Other))+labs(title = "Algae vs. Other")
grid.arrange(p3, p4, ncol = 2, nrow=1)

#note how other is related to algae and hard coral, bioenv does not take this into account.
  #therefore, I am taking Other out of the model. Additionally other is just when the point could not be identified due to shadows or blur, so it is not an effective predictor.

#final mixed effects model
model3 = lmer(Fish_msqr ~ Hard_coral + Algae + (1|Avg_d) + (1|Dive_number), data=questdf)
## boundary (singular) fit: see help('isSingular')
summary(model3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Fish_msqr ~ Hard_coral + Algae + (1 | Avg_d) + (1 | Dive_number)
##    Data: questdf
## 
## REML criterion at convergence: 32.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.77739 -0.35091  0.07196  0.41560  0.66733 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Avg_d       (Intercept) 0.4394   0.6628  
##  Dive_number (Intercept) 0.0000   0.0000  
##  Residual                0.1185   0.3442  
## Number of obs: 11, groups:  Avg_d, 9; Dive_number, 2
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  2.06695    1.02111   2.024
## Hard_coral   0.09586    0.02886   3.322
## Algae       -0.02068    0.02012  -1.028
## 
## Correlation of Fixed Effects:
##            (Intr) Hrd_cr
## Hard_coral -0.018       
## Algae      -0.902 -0.351
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Anova(model3)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: Fish_msqr
##              Chisq Df Pr(>Chisq)    
## Hard_coral 11.0353  1  0.0008939 ***
## Algae       1.0561  1  0.3041063    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#mixed effects model still shows very significant relationship between coral cover and juvinile fish abundance

Results: mixed effects model still shows very significant relationship between coral cover and juvinile fish abundance.