“here is the table. there is the occurrence of different prey species under different bridges. either 0 = no occurrence or 1 occurring. The bridges are under different streams and two countries Cz and DE. COuld you try to make statics if there is a difference between countries and also for the streams? From the distribution i think there would be some difference. Thanks. I need it for the article ASAP.”
Occurrence of different prey species under different bridges. either 0 = no occurrence or 1 occurring. The bridges are under different streams and two countries Cz and DE.
#Raw file in folder raw. Made a copy of stat.xlsx, named it "bridgesData" and deleted all sheets but "GIS"
# Deleted Odonata and Coleoptera, joining all observations under Insecta
# Deleted Alburnoides, because the two observations were errors.
# 3/12/20 - Added variable Geography (North/South)
raw <- read_excel(here::here("data", "bridgesData.xlsx"))
tidyBridges <- raw %>%
# clean_names(., "small_camel") %>%
# dplyr::mutate(name = as.factor(name)) %>%
dplyr::rename(Stream = stream) %>%
dplyr::rename(lon= "Coord Y") %>%
dplyr::rename(lat= "Coord X") %>%
dplyr::rename("Gasterosteus sp."
= "Gasterosseus aculeatus") %>%
dplyr::rename("Squalius cephalus"
= "Leuciscus cephalus") %>%
dplyr::rename("Abramis bjoerkna"
= "Blicca bjoerkna") %>%
dplyr::rename("Cottus gobio"
= "Cottus") %>%
dplyr::rename("Carassius sp."
= "Carassius") %>%
dplyr::rename("Astacoidea"
= "Astacus") %>%
dplyr::rename("Gymnocephalus cernuus"
= "Gymnocephalus") %>%
dplyr::rename("Ctenopharyngodon idella"
= "Ctenopharyngodon") %>%
dplyr::rename("Gobio a Romanogobio sp."
= "Gobio") %>%
dplyr::rename("Abramis brama"
= "Abramis") %>%
dplyr::select(!Cyprinidae) # Lukas says to drop it, but maybe of use for Class plots
# That's not actually tidy, because it's in wide form, but we need it like that for vegan.
longBridges <- tidyBridges %>%
pivot_longer(cols = "Abramis brama":Serpentes, names_to = "Species", values_to = "count") %>%
dplyr::mutate(Species = as_factor(Species)) %>%
dplyr::filter(count > 0)
# glimpse(tidyBridges)
# summary(tidyBridges)
ctable(x = longBridges$Species,
y = longBridges$Country,
prop = "c")
## Cross-Tabulation, Column Proportions
## Species * Country
## Data Frame: longBridges
##
## ---------------------------- --------- -------------- -------------- --------------
## Country CZ DE Total
## Species
## Abramis brama 7 ( 1.5%) 1 ( 0.2%) 8 ( 0.9%)
## Alburnus alburnus 5 ( 1.1%) 1 ( 0.2%) 6 ( 0.7%)
## Anguilla anguilla 3 ( 0.7%) 0 ( 0.0%) 3 ( 0.3%)
## Barbatula barbatula 17 ( 3.7%) 33 ( 7.4%) 50 ( 5.5%)
## Barbus barbus 2 ( 0.4%) 2 ( 0.4%) 4 ( 0.4%)
## Abramis bjoerkna 5 ( 1.1%) 0 ( 0.0%) 5 ( 0.6%)
## Carassius sp. 21 ( 4.6%) 33 ( 7.4%) 54 ( 6.0%)
## Cottus gobio 16 ( 3.5%) 71 ( 15.9%) 87 ( 9.6%)
## Ctenopharyngodon idella 1 ( 0.2%) 0 ( 0.0%) 1 ( 0.1%)
## Cyprinus carpio 31 ( 6.8%) 5 ( 1.1%) 36 ( 4.0%)
## Esox lucius 1 ( 0.2%) 0 ( 0.0%) 1 ( 0.1%)
## Gobio a Romanogobio sp. 25 ( 5.5%) 31 ( 7.0%) 56 ( 6.2%)
## Gasterosteus sp. 1 ( 0.2%) 11 ( 2.5%) 12 ( 1.3%)
## Gymnocephalus cernuus 5 ( 1.1%) 2 ( 0.4%) 7 ( 0.8%)
## Chondrostoma nasus 1 ( 0.2%) 2 ( 0.4%) 3 ( 0.3%)
## Ictalurus nebulosus 6 ( 1.3%) 0 ( 0.0%) 6 ( 0.7%)
## Lepomis gibbosus 2 ( 0.4%) 1 ( 0.2%) 3 ( 0.3%)
## Squalius cephalus 10 ( 2.2%) 9 ( 2.0%) 19 ( 2.1%)
## Leuciscus leuciscus 1 ( 0.2%) 0 ( 0.0%) 1 ( 0.1%)
## Lota lota 4 ( 0.9%) 0 ( 0.0%) 4 ( 0.4%)
## Neogobius melanostomus 3 ( 0.7%) 6 ( 1.3%) 9 ( 1.0%)
## Perca fluviatilis 25 ( 5.5%) 9 ( 2.0%) 34 ( 3.8%)
## Phoxinus phoxinus 10 ( 2.2%) 52 ( 11.7%) 62 ( 6.9%)
## Pseudorasbora parva 8 ( 1.7%) 1 ( 0.2%) 9 ( 1.0%)
## Rutilus rutilus 31 ( 6.8%) 7 ( 1.6%) 38 ( 4.2%)
## Salmonids sum 56 ( 12.2%) 72 ( 16.1%) 128 ( 14.2%)
## Scardinius erythrophtalmus 6 ( 1.3%) 0 ( 0.0%) 6 ( 0.7%)
## Silurus glanis 4 ( 0.9%) 0 ( 0.0%) 4 ( 0.4%)
## Stizostedion lucioperca 1 ( 0.2%) 0 ( 0.0%) 1 ( 0.1%)
## Tinca tinca 9 ( 2.0%) 15 ( 3.4%) 24 ( 2.7%)
## Anura 94 ( 20.5%) 52 ( 11.7%) 146 ( 16.2%)
## Astacoidea 12 ( 2.6%) 18 ( 4.0%) 30 ( 3.3%)
## Aves 11 ( 2.4%) 5 ( 1.1%) 16 ( 1.8%)
## Mammalia 9 ( 2.0%) 4 ( 0.9%) 13 ( 1.4%)
## Insecta 13 ( 2.8%) 3 ( 0.7%) 16 ( 1.8%)
## Serpentes 2 ( 0.4%) 0 ( 0.0%) 2 ( 0.2%)
## Total 458 (100.0%) 446 (100.0%) 904 (100.0%)
## ---------------------------- --------- -------------- -------------- --------------
longBridges %>%
ggplot(aes(x = forcats::fct_infreq(Species))) +
geom_bar(stat = "count") +
# geom_bar(position = position_stack(reverse = TRUE)) +
facet_grid(. ~ Country)+
theme_bw() +
scale_y_continuous("Number of locations where the sp was found") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Most common prey") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
# ggsave(file="freqCZDE.png", width=7, height=5, dpi=300)
#
# print (freqCZDE)
longCz <- longBridges %>%
dplyr::filter(Country == "CZ")
longDe <- longBridges %>%
dplyr::filter(Country == "DE")
longCz %>%
ggplot(aes(x = forcats::fct_infreq(Species))) +
geom_bar(stat = "count") +
# facet_grid(. ~ Stream)+
theme_bw() +
scale_y_continuous("Number of locations where the sp was found") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Most common prey in CZ") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
longDe %>%
ggplot(aes(x = forcats::fct_infreq(Species))) +
geom_bar(stat = "count") +
# facet_grid(. ~ Stream)+
theme_bw() +
scale_y_continuous("Number of locations where the sp was found") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Most common prey in DE") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
ctable(x = longBridges$Species,
y = longBridges$Geography,
prop = "c")
## Cross-Tabulation, Column Proportions
## Species * Geography
## Data Frame: longBridges
##
## ---------------------------- ----------- -------------- -------------- --------------
## Geography north south Total
## Species
## Abramis brama 1 ( 0.2%) 7 ( 2.0%) 8 ( 0.9%)
## Alburnus alburnus 2 ( 0.4%) 4 ( 1.1%) 6 ( 0.7%)
## Anguilla anguilla 0 ( 0.0%) 3 ( 0.9%) 3 ( 0.3%)
## Barbatula barbatula 34 ( 6.1%) 16 ( 4.6%) 50 ( 5.5%)
## Barbus barbus 2 ( 0.4%) 2 ( 0.6%) 4 ( 0.4%)
## Abramis bjoerkna 0 ( 0.0%) 5 ( 1.4%) 5 ( 0.6%)
## Carassius sp. 37 ( 6.7%) 17 ( 4.9%) 54 ( 6.0%)
## Cottus gobio 82 ( 14.8%) 5 ( 1.4%) 87 ( 9.6%)
## Ctenopharyngodon idella 0 ( 0.0%) 1 ( 0.3%) 1 ( 0.1%)
## Cyprinus carpio 8 ( 1.4%) 28 ( 8.0%) 36 ( 4.0%)
## Esox lucius 0 ( 0.0%) 1 ( 0.3%) 1 ( 0.1%)
## Gobio a Romanogobio sp. 32 ( 5.8%) 24 ( 6.9%) 56 ( 6.2%)
## Gasterosteus sp. 11 ( 2.0%) 1 ( 0.3%) 12 ( 1.3%)
## Gymnocephalus cernuus 4 ( 0.7%) 3 ( 0.9%) 7 ( 0.8%)
## Chondrostoma nasus 2 ( 0.4%) 1 ( 0.3%) 3 ( 0.3%)
## Ictalurus nebulosus 0 ( 0.0%) 6 ( 1.7%) 6 ( 0.7%)
## Lepomis gibbosus 1 ( 0.2%) 2 ( 0.6%) 3 ( 0.3%)
## Squalius cephalus 10 ( 1.8%) 9 ( 2.6%) 19 ( 2.1%)
## Leuciscus leuciscus 0 ( 0.0%) 1 ( 0.3%) 1 ( 0.1%)
## Lota lota 0 ( 0.0%) 4 ( 1.1%) 4 ( 0.4%)
## Neogobius melanostomus 6 ( 1.1%) 3 ( 0.9%) 9 ( 1.0%)
## Perca fluviatilis 15 ( 2.7%) 19 ( 5.4%) 34 ( 3.8%)
## Phoxinus phoxinus 54 ( 9.7%) 8 ( 2.3%) 62 ( 6.9%)
## Pseudorasbora parva 1 ( 0.2%) 8 ( 2.3%) 9 ( 1.0%)
## Rutilus rutilus 10 ( 1.8%) 28 ( 8.0%) 38 ( 4.2%)
## Salmonids sum 98 ( 17.7%) 30 ( 8.6%) 128 ( 14.2%)
## Scardinius erythrophtalmus 0 ( 0.0%) 6 ( 1.7%) 6 ( 0.7%)
## Silurus glanis 0 ( 0.0%) 4 ( 1.1%) 4 ( 0.4%)
## Stizostedion lucioperca 1 ( 0.2%) 0 ( 0.0%) 1 ( 0.1%)
## Tinca tinca 16 ( 2.9%) 8 ( 2.3%) 24 ( 2.7%)
## Anura 82 ( 14.8%) 64 ( 18.3%) 146 ( 16.2%)
## Astacoidea 22 ( 4.0%) 8 ( 2.3%) 30 ( 3.3%)
## Aves 7 ( 1.3%) 9 ( 2.6%) 16 ( 1.8%)
## Mammalia 7 ( 1.3%) 6 ( 1.7%) 13 ( 1.4%)
## Insecta 9 ( 1.6%) 7 ( 2.0%) 16 ( 1.8%)
## Serpentes 1 ( 0.2%) 1 ( 0.3%) 2 ( 0.2%)
## Total 555 (100.0%) 349 (100.0%) 904 (100.0%)
## ---------------------------- ----------- -------------- -------------- --------------
longN <- longBridges %>%
dplyr::filter(Geography == "north")
longS <- longBridges %>%
dplyr::filter(Geography == "south")
longN %>%
ggplot(aes(x = forcats::fct_infreq(Species))) +
geom_bar(stat = "count") +
# facet_grid(. ~ Stream)+
theme_bw() +
scale_y_continuous("Number of locations where the sp was found") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Most common prey in the North") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
longS %>%
ggplot(aes(x = forcats::fct_infreq(Species))) +
geom_bar(stat = "count") +
# facet_grid(. ~ Stream)+
theme_bw() +
scale_y_continuous("Number of locations where the sp was found") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Most common prey in the South") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
nativeFishProp <- longBridges %>%
dplyr::filter(!Species %in% c("Serpentes", "Anura", "Insecta","Mammalia", "Aves", "Astacoidea")) %>%
dplyr::mutate(Origin = dplyr::case_when(
Species == "Neogobius melanostomus"|
Species == "Pseudorasbora parva" |
Species == "Gasterosteus aculeatus" |
Species == "Ictalurus nebulosus" |
Species == "Lepomis gibbosus" ~ "Non-native",
Species == "Cottus gobio"|
Species == "Phoxinus phoxinus" |
Species == "Barbatula barbatula" |
Species == "Gobio a Romanogobio sp." |
Species == "Gymnocephalus cernuus" |
Species == "Alburnus alburnus"
~ "Native wild",
TRUE ~ "Native stocked")) # fish pooled
# glimpse(nativeFishProp)
ctable(x = nativeFishProp$Origin,
y = nativeFishProp$Country,
prop = "c",
chisq = TRUE) # display results of Chi-square test of independence
## Cross-Tabulation, Column Proportions
## Origin * Country
## Data Frame: nativeFishProp
##
##
## ---------------- --------- -------------- -------------- --------------
## Country CZ DE Total
## Origin
## Native stocked 220 ( 69.4%) 166 ( 45.6%) 386 ( 56.7%)
## Native wild 78 ( 24.6%) 190 ( 52.2%) 268 ( 39.4%)
## Non-native 19 ( 6.0%) 8 ( 2.2%) 27 ( 4.0%)
## Total 317 (100.0%) 364 (100.0%) 681 (100.0%)
## ---------------- --------- -------------- -------------- --------------
##
## ----------------------------
## Chi.squared df p.value
## ------------- ---- ---------
## 55.8642 2 0
## ----------------------------
# mtcars %>%
# count(gear, cyl) %>%
# ggplot(aes(x = reorder(gear, n, sum), y = n, fill = cyl)) +
# geom_col() +
# coord_flip()
nativeFishProp %>%
ggplot(aes(x = Species, fill= Origin)) +
geom_bar(position = position_stack(reverse = TRUE)) +
facet_grid(. ~ Country)+
theme_classic() +
scale_y_continuous("Number of occurrences") +
scale_x_discrete("") +
labs(fill = "Species") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Prey origin") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
# fix this one by country
nativeFishProp %>%
ggplot(aes(x = Origin)) +
geom_bar(position = position_stack(reverse = TRUE)) +
facet_grid(. ~ Country)+
theme_classic() +
scale_y_continuous("Number of occurrences") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Prey origin") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
ctable(x = nativeFishProp$Origin,
y = nativeFishProp$Geography,
prop = "c",
chisq = TRUE) # display results of Chi-square test of independence
## Cross-Tabulation, Column Proportions
## Origin * Geography
## Data Frame: nativeFishProp
##
##
## ---------------- ----------- -------------- -------------- --------------
## Geography north south Total
## Origin
## Native stocked 211 ( 49.4%) 175 ( 68.9%) 386 ( 56.7%)
## Native wild 208 ( 48.7%) 60 ( 23.6%) 268 ( 39.4%)
## Non-native 8 ( 1.9%) 19 ( 7.5%) 27 ( 4.0%)
## Total 427 (100.0%) 254 (100.0%) 681 (100.0%)
## ---------------- ----------- -------------- -------------- --------------
##
## ----------------------------
## Chi.squared df p.value
## ------------- ---- ---------
## 48.7691 2 0
## ----------------------------
# mtcars %>%
# count(gear, cyl) %>%
# ggplot(aes(x = reorder(gear, n, sum), y = n, fill = cyl)) +
# geom_col() +
# coord_flip()
nativeFishProp %>%
ggplot(aes(x = Species, fill= Origin)) +
geom_bar(position = position_stack(reverse = TRUE)) +
facet_grid(. ~ Geography)+
theme_classic() +
scale_y_continuous("Number of occurrences") +
scale_x_discrete("") +
labs(fill = "Species") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Prey origin") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
# fix this one by country
nativeFishProp %>%
ggplot(aes(x = Origin)) +
geom_bar(position = position_stack(reverse = TRUE)) +
facet_grid(. ~ Geography)+
theme_classic() +
scale_y_continuous("Number of occurrences") +
scale_x_discrete("") +
#theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
labs(title = "Prey origin") +
theme(axis.text.x = element_text(size = 12)) +
theme(axis.title.x = element_text(size = 15)) +
theme(axis.title.y = element_text(size = 15)) +
theme(plot.title = element_text(size = 15))
# Determine geographic extent of our data
max.lat <- ceiling(max(tidyBridges$lat))
min.lat <- floor(min(tidyBridges$lat))
max.lon <- ceiling(max(tidyBridges$lon))
min.lon <- floor(min(tidyBridges$lon))
geoExtent <- extent(x = c(min.lon, max.lon, min.lat, max.lat))
#download climate data
# bioclimData <- getData(name = "worldclim",
# var = "bio",
# res = 0.5,
# lon=13,
# lat=51,
# path = "data/")
# altData <- getData(name = "worldclim",
# var = "alt",
# res = 0.5,
# lon=13,
# lat=51,
# path = "data/")
To get a better idea of the area we are interested in, lets look at where our points are using leaflet:
# Create a color palette
pal <- colorFactor("viridis", nativeFishProp$Origin)
# Prepare the text for the tooltip:
mytext <- paste(
"Species: ", nativeFishProp$Species, "<br/>",
"Stream: ", nativeFishProp$Stream, "<br/>",
"Longitude: ", nativeFishProp$lat, "<br/>",
"Latitude: ", nativeFishProp$lon, sep="") %>%
lapply(htmltools::HTML)
# Final Map
m <- leaflet(nativeFishProp) %>%
addTiles() %>%
setView( lat=50.7, lng=13.5 , zoom=8.5) %>%
addProviderTiles("Esri.WorldImagery") %>%
addCircleMarkers(~lat, ~lon,
fillColor = ~pal(nativeFishProp$Origin), fillOpacity = 0.7, color="white", radius=8, stroke=FALSE,
label = mytext,
labelOptions = labelOptions( style = list("font-weight" = "normal", padding = "3px 8px"), textsize = "13px", direction = "auto")
) %>%
addLegend(pal=pal, values=~nativeFishProp$Origin, opacity=0.9, title = "Origin", position = "bottomright" )
m
#
# pho <-gbif("Phoxinus phoxinus",geo=T, removeZeros=T, ext = c(50, 52, 12, 15) ) #retrieve all occurrences of Pho pho from Germany. By including geo=T in the previous command we retain only the georeferenced records.
#
# coordinates(phoXy) = c("lon", "lat") #Set coordinates to a Spatial data frame
#
# data(wrld_simpl)
#
# plot(wrld_simpl, axes=TRUE, col='light green', las=1) #plot points on a world map
#
# zoom(wrld_simpl, axes=TRUE, las=1, col='light green') #zoom to a specific region
#
# points(phoXy, col='orange', pch=20, cex=0.75)
# bridge abundance matrix
abundance.matrix <- tidyBridges[,5:40]
# store computed indices in a new data frame called 'indices'
indices <- tidyBridges[,c("Stream","Country","GPS","Geography")]
indices$Richness <- rowSums(abundance.matrix>0)
# raremax <- min(rowSums(abundance.matrix>0))
# indices$Rarefied <- rarefy(abundance.matrix, raremax)
Use boxplots to visualize differences in species diversity between countries and across streams.
par(mar=c(3,4,3,2), mfrow=c(2,2))
colors = terrain.colors(6)[5:1]
boxplot(Richness~Country, data=indices, boxwex=0.5, col=colors,
cex.axis=0.5, ylab="Richness")
boxplot(Richness~Geography, data=indices, boxwex=0.5, col=colors,
cex.axis=0.5, ylab="Richness") +
coord_flip()
## NULL
# boxplot(Rarefied~Stream, data=indices, boxwex=0.5, col=colors,
# cex.axis=0.5, ylab="Rarefied richness") +
# coord_flip()
# boxplot(Rarefied~Country, data=indices, boxwex=0.5, col=colors,
# cex.axis=0.5, ylab="Rarefied richness")
We can test for differences among streams statistically using a linear model, with Stream as a predictor of species diversity:
# fit linear models
mod.richness <- lm(Richness~Stream, data=indices)
# mod.rarefied <- lm(Rarefied~Stream, data=indices)
# ANOVA Richness
anova(mod.richness)
## Analysis of Variance Table
##
## Response: Richness
## Df Sum Sq Mean Sq F value Pr(>F)
## Stream 65 469.35 7.2207 1.5784 0.008705 **
## Residuals 203 928.68 4.5748
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod.richness)
##
## Call:
## lm(formula = Richness ~ Stream, data = indices)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.000 -1.000 0.000 1.167 7.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.57143 0.80842 4.418 1.62e-05 ***
## StreamBiela 0.02857 1.25239 0.023 0.98182
## StreamBílá voda -0.07143 1.71491 -0.042 0.96682
## StreamBílina 1.42857 1.07789 1.325 0.18655
## StreamBílý potok -0.90476 1.47596 -0.613 0.54056
## StreamBobritzsch 1.32857 1.05405 1.260 0.20895
## StreamBourlivec 2.02857 1.25239 1.620 0.10684
## StreamBystrice -2.57143 1.47596 -1.742 0.08299 .
## StreamCerná -0.57143 1.71491 -0.333 0.73932
## StreamCerná voda 0.09524 1.47596 0.065 0.94861
## StreamChomutovka -0.77143 1.25239 -0.616 0.53861
## StreamCunnersdorfer Bach -0.90476 1.47596 -0.613 0.54056
## StreamDivoká Bystrice -1.57143 1.71491 -0.916 0.36058
## StreamFlájský potok -1.57143 1.47596 -1.065 0.28828
## StreamFlöha -0.02597 1.03413 -0.025 0.97999
## StreamFreiberger Mulde -0.77143 1.25239 -0.616 0.53861
## StreamFuchsbach -2.23810 1.47596 -1.516 0.13098
## StreamGimmlitz -1.57143 2.28655 -0.687 0.49271
## StreamGottleuba -0.82143 1.34061 -0.613 0.54074
## StreamHabartický potok -1.57143 2.28655 -0.687 0.49271
## StreamHacka -3.57143 2.28655 -1.562 0.11986
## StreamHajský potok 3.42857 2.28655 1.499 0.13531
## StreamHucivý potok -1.82143 1.34061 -1.359 0.17576
## StreamJílovecký 0.82857 1.05405 0.786 0.43273
## StreamJiretínský potok -2.57143 1.47596 -1.742 0.08299 .
## StreamKamenicka 0.42857 2.28655 0.187 0.85151
## StreamKaterinský potok -0.57143 2.28655 -0.250 0.80291
## StreamLibešický potok 0.42857 2.28655 0.187 0.85151
## StreamLockwitzbach -0.90476 1.47596 -0.613 0.54056
## StreamLoucenský potok -2.57143 2.28655 -1.125 0.26209
## StreamLoupnice 1.57143 1.14327 1.374 0.17080
## StreamLužec 0.42857 2.28655 0.187 0.85151
## StreamMalodolský potok 0.42857 1.71491 0.250 0.80291
## StreamModlanský potok 5.42857 1.71491 3.166 0.00179 **
## StreamMüglitz 0.26190 0.95273 0.275 0.78367
## StreamNacetínský potok -0.57143 2.28655 -0.250 0.80291
## StreamPodhorský potok 4.42857 2.28655 1.937 0.05416 .
## StreamPodkrušnohorský privadec 0.20635 1.07789 0.191 0.84837
## StreamPodmileský potok -2.57143 2.28655 -1.125 0.26209
## StreamPöhlbach -1.07143 1.71491 -0.625 0.53282
## StreamPohranicní potok -2.57143 2.28655 -1.125 0.26209
## StreamPolava -1.90476 1.47596 -1.291 0.19834
## StreamPoustevnický potok -2.57143 2.28655 -1.125 0.26209
## StreamPressnitz -1.17143 1.25239 -0.935 0.35072
## StreamPrísecnice 0.14286 1.14327 0.125 0.90068
## StreamPrunérovský potok 1.59524 1.18996 1.341 0.18155
## StreamRadcický potok -1.57143 2.28655 -0.687 0.49271
## StreamRote Weisseritz -1.27143 1.05405 -1.206 0.22913
## StreamRotes Wasser -0.57143 1.71491 -0.333 0.73932
## StreamRybný potok 0.17857 1.34061 0.133 0.89417
## StreamSchwarze Pockau -1.07143 1.34061 -0.799 0.42510
## StreamSeidewitz -1.68254 1.07789 -1.561 0.12009
## StreamŠramnický potok -1.23810 1.47596 -0.839 0.40255
## StreamSvídnice 2.09524 1.47596 1.420 0.15726
## StreamSviní potok -3.57143 2.28655 -1.562 0.11986
## StreamTelcský potok -0.57143 2.28655 -0.250 0.80291
## StreamTelnický potok -1.57143 1.47596 -1.065 0.28828
## StreamTrí pánu -1.57143 2.28655 -0.687 0.49271
## StreamTriebisch -1.97143 1.25239 -1.574 0.11702
## StreamVereinigte Weißeritz -0.07143 1.71491 -0.042 0.96682
## StreamWesenitz 0.56190 0.97904 0.574 0.56665
## StreamWilde Weißeritz -0.82143 1.10697 -0.742 0.45891
## StreamWilde Weisseritz -1.57143 2.28655 -0.687 0.49271
## StreamZalužanský potok -1.57143 2.28655 -0.687 0.49271
## StreamŽdírnický potok -0.07143 1.18996 -0.060 0.95219
## StreamZschopau -0.79365 1.07789 -0.736 0.46240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.139 on 203 degrees of freedom
## Multiple R-squared: 0.3357, Adjusted R-squared: 0.123
## F-statistic: 1.578 on 65 and 203 DF, p-value: 0.008705
# anova(mod.rarefied)
# summary(mod.rarefied)
rich <- indices %>%
group_by(Country) %>%
shapiro_test(Richness)
rich
## # A tibble: 2 x 4
## Country variable statistic p
## <chr> <chr> <dbl> <dbl>
## 1 CZ Richness 0.913 0.000000493
## 2 DE Richness 0.936 0.00000494
# rar <-indices %>%
# group_by(Country) %>%
# shapiro_test(Rarefied)
richGeo <- indices %>%
group_by(Geography) %>%
shapiro_test(Richness)
richGeo
## # A tibble: 2 x 4
## Geography variable statistic p
## <chr> <chr> <dbl> <dbl>
## 1 north Richness 0.943 0.00000168
## 2 south Richness 0.912 0.0000107
# rar
They are not normally distributed
richPlot <- ggqqplot(indices, x = "Richness", facet.by = "Country")
richPlot
richGeoPlot <- ggqqplot(indices, x = "Richness", facet.by = "Geography")
richGeoPlot
# rarPlot <- ggqqplot(indices, x = "Rarefied", facet.by = "Country")
# rarPlot
mmm
This can be done using the Levene’s test. If the variances of groups are equal, the p-value should be greater than 0.05.
richLev <- indices %>%
levene_test(Richness ~ Country)
richLev
## # A tibble: 1 x 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 1 267 9.57 0.00219
richGeoLev <- indices %>%
levene_test(Richness ~ Geography)
richGeoLev
## # A tibble: 1 x 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 1 267 18.4 0.0000253
# rarLev <- indices %>%
# levene_test(Rarefied ~ Country)
# rarLev
Richness variances are not equal in the two groups.
We will use the Wilcoxon rank sum test, which is a non-parametric alternative to the independent two samples t-test for comparing two independent groups of samples, in the situation where the data are not normally distributed.
First, some summary statistics by groups: median and interquartile range (IQR).
richStats <- indices %>%
group_by(Country) %>%
get_summary_stats(Richness, type = "median_iqr")
richStats
## # A tibble: 2 x 5
## Country variable n median iqr
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 CZ Richness 127 3 3
## 2 DE Richness 142 3 2
richGeoStats <- indices %>%
group_by(Geography) %>%
get_summary_stats(Richness, type = "median_iqr")
richGeoStats
## # A tibble: 2 x 5
## Geography variable n median iqr
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 north Richness 176 3 2
## 2 south Richness 93 3 3
# rarStats <- indices %>%
# group_by(Country) %>%
# get_summary_stats(Rarefied, type = "median_iqr")
# rarStats
bxpRich <- ggboxplot(
indices, x = "Country", y = "Richness",
ylab = "Richness", xlab = "Country", add = "jitter"
)
bxpRich
bxpGeoRich <- ggboxplot(
indices, x = "Geography", y = "Richness",
ylab = "Richness", xlab = "Geography", add = "jitter"
)
bxpGeoRich
# bxpRar <- ggboxplot(
# indices, x = "Country", y = "Rarefied",
# ylab = "Rarefied richness", xlab = "Country", add = "jitter"
# )
# bxpRar
wilcoxRich <- indices %>%
rstatix::wilcox_test(Richness ~ Country) %>%
add_significance()
wilcoxRich
## # A tibble: 1 x 8
## .y. group1 group2 n1 n2 statistic p p.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <chr>
## 1 Richness CZ DE 127 142 9618. 0.341 ns
wilcoxRich <- indices %>%
rstatix::wilcox_test(Richness ~ Country) %>%
add_significance()
wilcoxRich
## # A tibble: 1 x 8
## .y. group1 group2 n1 n2 statistic p p.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <chr>
## 1 Richness CZ DE 127 142 9618. 0.341 ns
# wilcoxRar <- indices %>%
# rstatix::wilcox_test(Rarefied ~ Country) %>%
# add_significance()
# wilcoxRar
wilcoxGeoRich <- indices %>%
rstatix::wilcox_test(Richness ~ Geography) %>%
add_significance()
wilcoxGeoRich
## # A tibble: 1 x 8
## .y. group1 group2 n1 n2 statistic p p.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <chr>
## 1 Richness north south 176 93 7528. 0.275 ns
library(coin)
effRich <- indices %>% wilcox_effsize(Richness ~ Country)
effRich
## # A tibble: 1 x 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 Richness CZ DE 0.0581 127 142 small
effGeoRich <- indices %>% wilcox_effsize(Richness ~ Geography)
effGeoRich
## # A tibble: 1 x 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 Richness north south 0.0666 176 93 small
# effRar <- indices %>% wilcox_effsize(Rarefied ~ Country)
# effRar
The summary result paragraphs could be something like the ones below.
wilcoxRich <- wilcoxRich %>% add_xy_position(x = "Country")
bxpRich +
stat_pvalue_manual(wilcoxRich, tip.length = 0) +
labs(subtitle = get_test_label(wilcoxRich, detailed = TRUE))
wilcoxGeoRich <- wilcoxGeoRich %>% add_xy_position(x = "Geography")
bxpGeoRich +
stat_pvalue_manual(wilcoxGeoRich, tip.length = 0) +
labs(subtitle = get_test_label(wilcoxGeoRich, detailed = TRUE))
# wilcoxRar <- wilcoxRar %>% add_xy_position(x = "Country")
# bxpRar +
# stat_pvalue_manual(wilcoxRar, tip.length = 0) +
# labs(subtitle = get_test_label(wilcoxRar, detailed = TRUE))