library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.4.4 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ car::recode() masks dplyr::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(readxl)
Data <- read_excel("D:/SNSHS STATS/Gelia/GELIA.xlsx")
View(Data)
library(rmarkdown)
paged_table(Data)
res_aov <- aov(`Shannon's Index` ~ Location,
data = Data
)
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.70657, p-value = 0.001693
Data is not normally distributed.
res_aov <- aov(`Shannon's Index` ~ `Substrate Type`,
data = Data
)
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.87266, p-value = 0.1313
Data is normally distributed.
res_aov <- aov(`Population` ~ Location,
data = Data
)
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.92565, p-value = 0.441
Data is normally distributed.
res_aov <- aov(`Population` ~ `Substrate Type`,
data = Data
)
shapiro.test(res_aov$residuals)
##
## Shapiro-Wilk normality test
##
## data: res_aov$residuals
## W = 0.95023, p-value = 0.6926
Data is not normally distributed.
leveneTest(`Shannon's Index` ~ Location,
data = Data)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 3.9631 0.07997 .
## 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The p-value is greater than the 0.05 level of significance. Thus, the homogeneity assumption of the variance is met.
leveneTest(`Shannon's Index` ~ `Substrate Type`,
data = Data)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.5872 0.5849
## 6
The p-value is greater than the 0.05 level of significance. Thus, the homogeneity assumption of the variance is met.
leveneTest(`Population` ~ Location,
data = Data)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.1624 0.8537
## 6
The p-value is greater than the 0.05 level of significance. Thus, the homogeneity assumption of the variance is met.
leveneTest(`Population` ~ `Substrate Type`,
data = Data)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.2591 0.78
## 6
The p-value is greater than the 0.05 level of significance. Thus, the homogeneity assumption of the variance is met.
ggboxplot(Data, x = "Location", y = "Shannon's Index",
color = "Location", palette = c("#FC4E07", "#00FF00", "#4E07FC"),
order = c("Catabaan", "RSK", "Boulevard"),
ylab = "Shannon's Index", xlab = "Location")
ggline(Data, x = "Location", y = "Shannon's Index",
add = c("mean_se", "jitter"),
order = c("Catabaan", "RSK", "Boulevard"),
ylab = "Shannon's Index", xlab = "Location")
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(`Shannon's Index` ~ Location, data = Data, frame = TRUE,
xlab = "Location", ylab = "Shannon's Index",
main="Mean Plot with 95% CI")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
PlantGrowth <- Data %>%
reorder_levels(Location, order = c("Catabaan", "RSK", "Boulevard"))
PlantGrowth %>%
group_by(Location) %>%
get_summary_stats(`Shannon's Index`, type = "common")
## # A tibble: 3 × 11
## Location variable n min max median iqr mean sd se ci
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Catabaan Shannon's In… 3 0 0 0 0 0 0 0 0
## 2 RSK Shannon's In… 3 0 0 0 0 0 0 0 0
## 3 Boulevard Shannon's In… 3 0 0.377 0.199 0.189 0.192 0.189 0.109 0.469
ggboxplot(PlantGrowth, x = "Location", y = "Shannon's Index", fill="Location")
res.kruskal <- PlantGrowth %>% kruskal_test(`Shannon's Index` ~ Location)
res.kruskal
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Shannon's Index 9 4.50 2 0.105 Kruskal-Wallis
Based on the p-value, there is no significant difference was observed between the group pairs in terms of length.
res1<- PlantGrowth %>%
dunn_test(`Shannon's Index` ~ Location, p.adjust.method = "bonferroni")
res1
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Shannon's Index Cataba… RSK 3 3 0 1 1 ns
## 2 Shannon's Index Cataba… Boule… 3 3 1.84 0.0662 0.199 ns
## 3 Shannon's Index RSK Boule… 3 3 1.84 0.0662 0.199 ns
group_by(Data, `Substrate Type`) %>%
summarise(
count = n(),
mean = mean(`Shannon's Index`, na.rm = TRUE),
sd = sd(`Shannon's Index`, na.rm = TRUE)
)
## # A tibble: 3 × 4
## `Substrate Type` count mean sd
## <chr> <int> <dbl> <dbl>
## 1 Intertidal Rocky Shores 3 0.126 0.218
## 2 Sandy Beaches 3 0 0
## 3 Sea Grass Beds 3 0.0663 0.115
library("ggpubr")
ggboxplot(Data, x = "Substrate Type", y = "Shannon's Index",
color = "Substrate Type", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("Intertidal Rocky Shores", "Sandy Beaches", "Sea Grass Beds"),
ylab = "Shannon's Index", xlab = "Substrate Type")
ggline(Data, x = "Substrate Type", y = "Shannon's Index",
add = c("mean_se", "jitter"),
order = c("Intertidal Rocky Shores", "Sandy Beaches", "Sea Grass Beds"),
ylab = "Shannon's Index", xlab = "Substrate Type")
library("gplots")
plotmeans(`Shannon's Index` ~ `Substrate Type`, data = Data, frame = TRUE,
xlab = "Substrate Type", ylab = "Shannon's Index",
main="Mean Plot with 95% CI")
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
res.aov <- aov(`Shannon's Index` ~ `Substrate Type`, data = Data)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## `Substrate Type` 2 0.02371 0.01186 0.587 0.585
## Residuals 6 0.12115 0.02019
Since p-value = 0.585 < 0.05, it is conclusive that we fail to reject the null hypothesis, that is, there is no significant difference between the substrate type in terms of Shannon’s index
group_by(Data, `Location`) %>%
summarise(
count = n(),
mean = mean(`Population`, na.rm = TRUE),
sd = sd(`Population`, na.rm = TRUE)
)
## # A tibble: 3 × 4
## Location count mean sd
## <chr> <int> <dbl> <dbl>
## 1 Boulevard 3 36.3 4.04
## 2 Catabaan 3 24 6.56
## 3 RSK 3 21.7 5.69
library("ggpubr")
ggboxplot(Data, x = "Location", y = "Population",
color = "Location", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("Catabaan", "RSK", "Boulevard"),
ylab = "Population", xlab = "Location")
ggline(Data, x = "Location", y = "Population",
add = c("mean_se", "jitter"),
order = c("Catabaan", "RSK", "Boulevard"),
ylab = "Population", xlab = "Location")
library("gplots")
plotmeans(`Population` ~ `Location`, data = Data, frame = TRUE,
xlab = "Location", ylab = "Population",
main="Mean Plot with 95% CI")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
res.aov <- aov(`Population` ~ `Location`, data = Data)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Location 2 372.7 186.33 6.098 0.0359 *
## Residuals 6 183.3 30.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Since p-value = 0.0359 < 0.05, it is conclusive that we reject the null hypothesis, that is, there is significant difference between the location in terms of population.
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Population ~ Location, data = Data)
##
## $Location
## diff lwr upr p adj
## Catabaan-Boulevard -12.333333 -26.18154 1.5148774 0.0759949
## RSK-Boulevard -14.666667 -28.51488 -0.8184559 0.0400674
## RSK-Catabaan -2.333333 -16.18154 11.5148774 0.8661040
group_by(Data, `Substrate Type`) %>%
summarise(
count = n(),
mean = mean(`Population`, na.rm = TRUE),
sd = sd(`Population`, na.rm = TRUE)
)
## # A tibble: 3 × 4
## `Substrate Type` count mean sd
## <chr> <int> <dbl> <dbl>
## 1 Intertidal Rocky Shores 3 27.7 4.51
## 2 Sandy Beaches 3 24 11.3
## 3 Sea Grass Beds 3 30.3 10.0
library("ggpubr")
ggboxplot(Data, x = "Substrate Type", y = "Population",
color = "Substrate Type", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("Intertidal Rocky Shores", "Sandy Beaches", "Sea Grass Beds"),
ylab = "Population", xlab = "Substrate Type")
ggline(Data, x = "Substrate Type", y = "Population",
add = c("mean_se", "jitter"),
order = c("Intertidal Rocky Shores", "Sandy Beaches", "Sea Grass Beds"),
ylab = "Population", xlab = "Substrate Type")
library("gplots")
plotmeans(`Population` ~ `Substrate Type`, data = Data, frame = TRUE,
xlab = "Substrate Type", ylab = "Population",
main="Mean Plot with 95% CI")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
res.aov <- aov(`Population` ~ `Substrate Type`, data = Data)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## `Substrate Type` 2 60.7 30.33 0.367 0.707
## Residuals 6 495.3 82.56
Since p-value = 0.707 > 0.05, it is conclusive that we fail to reject the null hypothesis, that is, there is no significant difference between the substrate type in terms of population.
library(correlation)
##
## Attaching package: 'correlation'
## The following object is masked from 'package:rstatix':
##
## cor_test
library(corrplot)
## corrplot 0.92 loaded
library(tidyverse)
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
corrplot(corr = cor(Data[3:7], method = "spearman"),
addCoef.col = "black",
number.cex = 0.8,
number.digits = 2,
diag = FALSE,
bg = "grey",
outline = "black",
addgrid.col = "white",
mar = c(1,1,1,1))
pairs.panels(Data[3:7],
smooth = FALSE,
scale = FALSE,
density = TRUE,
ellipses = TRUE,
method = "spearman",
pch = 21,
bg = c("red", "yellow", "blue"),
lm = FALSE,
cor = TRUE,
jiggle = FALSE,
factor = 0,
hist.col = 4,
stars = TRUE,
ci = FALSE,
main = "Correlation Matrix of the Variables")
Correlation coefficients whose magnitude are between 0.9 and 1.0
indicate variables which can be considered very highly correlated.
Correlation coefficients whose magnitude are between 0.7 and 0.9
indicate variables which can be considered highly correlated.
Correlation coefficients whose magnitude are between 0.5 and 0.7
indicate variables which can be considered moderately correlated.
Correlation coefficients whose magnitude are between 0.3 and 0.5
indicate variables which have a low correlation. Correlation
coefficients whose magnitude are less than 0.3 have little if any
(linear) correlation.
multiple <- lm( `Shannon's Index`~ `Salinity` + `Water Temperature` + `pH Level`, data = Data)
multiple
##
## Call:
## lm(formula = `Shannon's Index` ~ Salinity + `Water Temperature` +
## `pH Level`, data = Data)
##
## Coefficients:
## (Intercept) Salinity `Water Temperature`
## 1.914349 0.005509 0.092164
## `pH Level`
## -0.591327
library(performance)
check_model(multiple)
multiple1 <- lm( `Population`~ `Salinity` + `Water Temperature` + `pH Level`, data = Data)
multiple1
##
## Call:
## lm(formula = Population ~ Salinity + `Water Temperature` + `pH Level`,
## data = Data)
##
## Coefficients:
## (Intercept) Salinity `Water Temperature`
## 38.8721 0.1209 5.1989
## `pH Level`
## -20.9810
library(performance)
check_model(multiple1)
summary(multiple)
##
## Call:
## lm(formula = `Shannon's Index` ~ Salinity + `Water Temperature` +
## `pH Level`, data = Data)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -0.023295 -0.012395 0.168716 0.017811 0.005489 -0.196604 -0.005548 0.017169
## 9
## 0.028657
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.914349 7.497661 0.255 0.809
## Salinity 0.005509 0.010622 0.519 0.626
## `Water Temperature` 0.092164 0.093725 0.983 0.371
## `pH Level` -0.591327 1.306208 -0.453 0.670
##
## Residual standard error: 0.1177 on 5 degrees of freedom
## Multiple R-squared: 0.5216, Adjusted R-squared: 0.2345
## F-statistic: 1.817 on 3 and 5 DF, p-value: 0.2609
summary(multiple1)
##
## Call:
## lm(formula = Population ~ Salinity + `Water Temperature` + `pH Level`,
## data = Data)
##
## Residuals:
## 1 2 3 4 5 6 7 8 9
## -1.3595 5.7905 -5.0552 -5.4498 -4.4193 0.4412 6.5574 -0.9229 4.4177
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.8721 375.3735 0.104 0.922
## Salinity 0.1209 0.5318 0.227 0.829
## `Water Temperature` 5.1989 4.6924 1.108 0.318
## `pH Level` -20.9810 65.3958 -0.321 0.761
##
## Residual standard error: 5.894 on 5 degrees of freedom
## Multiple R-squared: 0.6875, Adjusted R-squared: 0.5001
## F-statistic: 3.667 on 3 and 5 DF, p-value: 0.0979
The statistical output displays the coded coefficients, which are the standardized coefficients. pH level has the standardized coefficient with the largest absolute value. This measure suggests that pH level is the most important independent variable in the regression model.