questdf=read.csv("questData.csv")
#11 transects were surveyed at Leleiwi, counting juvenile acanthurus fishes and photographing benthic cover (which was processed for % cover in coral net, data was compiled based on functional groups algae, hard corals, etc.)
I am interested in the relationship between hard coral cover and the abundance of juvinile acanthurus fishes at Leleiwi.
library(ggpubr)
#normality of fish per square meter
hist(questdf$Fish_msqr) # Create histogram
abline(v=mean(questdf$Fish_msqr),col="blue") # add a line to the histogram to represent the mean value
abline(v=median(questdf$Fish_msqr),col="red") # add a line to the histogram to represent the median value
ggqqplot(questdf$Fish_msqr) # Create a qqplot
shapiro.test(questdf$Fish_msqr) # Shapiro test
##
## Shapiro-Wilk normality test
##
## data: questdf$Fish_msqr
## W = 0.93743, p-value = 0.4908
#Null hypothesis: The data are normally distributed.
#P-value=0.4908, FAIL TO REJECT
#Conclude: The data are normally distributed.
#normality of hard coral cover
hist(questdf$Hard_coral) # Create histogram
abline(v=mean(questdf$Hard_coral),col="blue") # add a line to the histogram to represent the mean value
abline(v=median(questdf$Hard_coral),col="red") # add a line to the histogram to represent the median value
ggqqplot(questdf$Hard_coral) # Create qqplot
shapiro.test(questdf$Hard_coral) # Shapiro test
##
## Shapiro-Wilk normality test
##
## data: questdf$Hard_coral
## W = 0.97788, p-value = 0.9532
#Null hypothesis: The data are normally distributed.
#P-value=0.9532, FAIL TO REJECT
#Conclude: The data are normally distributed.
#Explanatory independent variable: percent hard coral cover per transect, Response variable: juvenile fish per meters squared
simple_lm_model = lm(Fish_msqr ~ Hard_coral, data = questdf)
summary(simple_lm_model)
##
## Call:
## lm(formula = Fish_msqr ~ Hard_coral, data = questdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84882 -0.65876 -0.02464 0.43593 1.26632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.68026 0.57933 1.174 0.2704
## Hard_coral 0.12204 0.03899 3.130 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7503 on 9 degrees of freedom
## Multiple R-squared: 0.5212, Adjusted R-squared: 0.468
## F-statistic: 9.796 on 1 and 9 DF, p-value: 0.01212
#There is a significant relationship between hard coral cover and juvenile acan. fish per square meter because there is an *, indicating significance.
#The R squared value indicates what percentage of the variation in fish abundance was explained by hard coral cover (52.12%). The adjusted R squared value takes into account model complexity, penalizing additional independent variables. The adjusted value indicates what percentage of the variation in fish abundance was explained by hard coral cover (46.68%).
#define values to add to plot
p_value <- summary(simple_lm_model)$coefficients["Hard_coral", "Pr(>|t|)"]
r_squared <- summary(simple_lm_model)$r.squared
#Scatter plot of simple lm model
ggplot(questdf, aes(x = Fish_msqr, y = Hard_coral)) +
geom_point(aes(alpha = 0.2)) + # Scatterplot of the data points
geom_smooth(method = "lm",se = FALSE) + # Regression line
labs(x = "Juvinile fish per square meter", y = "Hard coral percent cover") + # Label the x and y axes
theme_minimal() + #add transparency and grid lines NO!! see pirate lab
guides(alpha = FALSE) +
geom_text(aes(label = paste("p-value =", round(p_value, 4), "\nR-squared =", round(r_squared, 2))),
x = Inf, y = -Inf, hjust = 1, vjust = -0.2)
## `geom_smooth()` using formula = 'y ~ x'
I ran the mixed effects model because I was interested if average depth and dive number affected the abundance of the fish, and since the fish are herbivorous, I wanted to include algal cover as well. First I used the bioenv analysis to identify the “Best Subset of Environmental Variables with Maximum (Rank) Correlation with Community Dissimilarities”.
community: fish abundance
environment: benthic cover
random effects: average depth and dive number
fixed effects: algae and hard coral cover
library(lme4)
library(car)
library(vegan)
library(gridExtra)
#set community and environment for bioenv analysis
community = (questdf[, c("Fish_msqr", "T_number")]) #community, fish abundance
enviornment = questdf[, c("Algae","Hard_coral", "Other", "Hard_substrate", "Soft_substrate", "Palythoa")] #environment, benthic cover
##bioenv, Best Subset of Environmental Variables with Maximum (Rank) Correlation with Community Dissimilarities
bioenv_result <- bioenv(community, enviornment)
bioenv_result #identify best model for regression
##
## Call:
## bioenv(comm = community, env = enviornment)
##
## Subset of environmental variables with best correlation to community data.
##
## Correlations: spearman
## Dissimilarities: bray
## Metric: euclidean
##
## Best model has 3 parameters (max. 6 allowed):
## Algae Hard_coral Other
## with correlation 0.3914863
#used bioenv results in linear model
model2= lm(Fish_msqr ~ Hard_coral+Algae+Other, data=questdf)
summary(model2)
##
## Call:
## lm(formula = Fish_msqr ~ Hard_coral + Algae + Other, data = questdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7489 -0.6002 -0.1071 0.4747 1.1796
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.16434 1.87852 0.620 0.5550
## Hard_coral 0.12289 0.04421 2.780 0.0273 *
## Algae -0.01220 0.02919 -0.418 0.6885
## Other 0.01470 0.05735 0.256 0.8051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8289 on 7 degrees of freedom
## Multiple R-squared: 0.5454, Adjusted R-squared: 0.3505
## F-statistic: 2.799 on 3 and 7 DF, p-value: 0.1184
#Other does not appear significant
#Visualize relationship between Other and Hard coral and Algae
plot=ggplot(data=questdf)
p3=plot+
geom_point(mapping = aes(x=Hard_coral, y=Other))+labs(title = "coral vs. Other")
p4=plot+
geom_point(mapping = aes(x=Algae, y=Other))+labs(title = "Algae vs. Other")
grid.arrange(p3, p4, ncol = 2, nrow=1)
#note how other is related to algae and hard coral, bioenv does not take this into account.
#therefore, I am taking Other out of the model. Additionally other is just when the point could not be identified due to shadows or blur, so it is not an effective predictor.
#final mixed effects model
model3 = lmer(Fish_msqr ~ Hard_coral + Algae + (1|Avg_d) + (1|Dive_number), data=questdf)
## boundary (singular) fit: see help('isSingular')
summary(model3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Fish_msqr ~ Hard_coral + Algae + (1 | Avg_d) + (1 | Dive_number)
## Data: questdf
##
## REML criterion at convergence: 32.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.77739 -0.35091 0.07196 0.41560 0.66733
##
## Random effects:
## Groups Name Variance Std.Dev.
## Avg_d (Intercept) 0.4394 0.6628
## Dive_number (Intercept) 0.0000 0.0000
## Residual 0.1185 0.3442
## Number of obs: 11, groups: Avg_d, 9; Dive_number, 2
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.06695 1.02111 2.024
## Hard_coral 0.09586 0.02886 3.322
## Algae -0.02068 0.02012 -1.028
##
## Correlation of Fixed Effects:
## (Intr) Hrd_cr
## Hard_coral -0.018
## Algae -0.902 -0.351
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Anova(model3)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: Fish_msqr
## Chisq Df Pr(>Chisq)
## Hard_coral 11.0353 1 0.0008939 ***
## Algae 1.0561 1 0.3041063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#mixed effects model still shows very significant relationship between coral cover and juvinile fish abundance
Results: mixed effects model still shows very significant relationship between coral cover and juvinile fish abundance.