This publication features an analysis of data collected during a pilot study of dead large wood (< 1 m in length and <10 cm diameter) at Greensboro’s North Buffalo Creek on October 23 and 24, 2020. Three different reaches (sites) of the creek were selected and studied: Latham Park, Audubon Preserve, and Lake Daniel Park (see map). Two hundred meters of each reach was surveyed by following Washington State Large Woody Debris Survey Protocol. Metrics were evaluated based on the following questions:
What is the frequency of wood at each site?
Are frequencies of large wood at each site statistically significant from each other?
What is the volume of large wood at each site?
Is the volume of large wood at each site significantly different from each other?
Is the length of large wood at each site significantly different from each other?
Is the diameter of large wood at each site significantly different from each other?
We need to import the dataset first. The dataset can be downloaded from here. (If using windows, press “control” button and click here it will open in your default browser)
setwd("D:/Pilot_Study/Descriptive-statistics-Mann-Whitney-U-test-and-Chi-squared-test")
library(readxl)
LathamPark <- read_excel("D:/Pilot_Study/Data.xlsx",
sheet = "LathamPark")
LakeDanielPark <- read_excel("D:/Pilot_Study/Data.xlsx",
sheet = "LakeDanielPark")
Auduban <- read_excel("D:/Pilot_Study/Data.xlsx",
sheet = "Auduban")
# install.packages("tidyverse")
library(tidyverse)
# install.packages("devtools")
# devtools::install_github("thomasp85/patchwork")
library(patchwork)
# install.packages("gdata")
library(gdata)
# install.packages("flextable")
library(flextable)
#install.packages("leaflet")
library(leaflet)
library(readxl)
data <- read_excel("Data_for_map.xlsx")
m <- leaflet() %>%
addTiles() %>%
addMarkers( data = data, lng = ~Long, lat = ~lat,
popup = ~ Name)
m
We are now converting all the Null Values into zeros using a function
hybrd.ifelse <- function(x) {
mutate_all(x, ~ifelse(is.na(.), 0, .))
}
Now, applying the function into our dataframe
LathamPark <- hybrd.ifelse(LathamPark)
Auduban <- hybrd.ifelse(Auduban)
LakeDanielPark <- hybrd.ifelse(LakeDanielPark)
Volume_Calc <- function(dataframe,
Zone1, Zone2, Zone3, Zone4, PieceD) {
length <- (Zone1 + Zone2 + Zone3+ Zone4)
diameter_m <- (PieceD/100) # Converting diameter into meter
radius <- diameter_m/2 # Radius = d/2
volume <- (length*pi*(radius^2)) # L*pi*r^2
a <- cbind(dataframe, length, diameter_m, radius, volume)
}
Applying the function into our dataframe
LathamPark <- Volume_Calc(LathamPark, LathamPark$Zone1, LathamPark$Zone2,
LathamPark$Zone3, LathamPark$Zone4, LathamPark$PieceD)
Auduban <- Volume_Calc(Auduban, Auduban$Zone1, Auduban$Zone2,
Auduban$Zone3, Auduban$Zone4, Auduban$PieceD)
LakeDanielPark <- Volume_Calc(LakeDanielPark, LakeDanielPark$Zone1,
LakeDanielPark$Zone2,
LakeDanielPark$Zone3, LakeDanielPark$Zone4,
LakeDanielPark$PieceD)
Selecting our volume column
Auduban_V <- mutate (Auduban, "Auduban Volume" = volume) %>%
select("Auduban Volume")
LakeDanielPark_V <- mutate(LakeDanielPark, "Lake Daniel Park volume" = volume) %>%
select("Lake Daniel Park volume")
LathamPark_V <- mutate(LathamPark, "Latham Park volume" = volume) %>%
select("Latham Park volume")
Volume <- gdata::cbindX(LakeDanielPark_V, Auduban_V,
LathamPark_V)
Volume <- pivot_longer(Volume, c("Lake Daniel Park volume", "Latham Park volume",
"Auduban Volume" ), names_to = "Field",
values_to = "Volume") %>% drop_na()
ggplot(Volume, aes(x = Field, y = Volume))+
geom_boxplot()+
theme_bw()+
annotate(geom="text", x=1.3, y=6.5, label="Very Large tree",
color="red")
Volume_hist<- function(dataframe, Site){
a <- filter(dataframe, Field == Site)
b <- ggplot(a, aes(x = Volume, fill= Site))+
geom_histogram()+
labs (x = Site)+
theme_bw()+
theme(legend.position = "none")
}
Auduban_V_ggplot <- Volume_hist(Volume, "Auduban Volume")
LathamPark_V_ggplot <- Volume_hist(Volume, "Latham Park volume")
LakeDanielPark_V_ggplot <- Volume_hist(Volume, "Lake Daniel Park volume")
All_site_hist <-
ggplot(Volume, aes(x = Volume, fill = Volume))+
geom_histogram()+
labs (x = "All sites")+
theme_bw()+
theme(legend.position = "none")
(Auduban_V_ggplot | LathamPark_V_ggplot) /
(LakeDanielPark_V_ggplot | All_site_hist)
The average, median and standard deviation of wood volume in Lake Daniel
mean(LakeDanielPark$volume)
## [1] 0.1504215
sd(LakeDanielPark$volume)
## [1] 0.163793
median(LakeDanielPark$volume)
## [1] 0.07696902
The average, median and standard deviation of wood volume in Audubon
mean(Auduban$volume)
## [1] 0.3286007
sd(Auduban$volume)
## [1] 1.04468
median(Auduban$volume)
## [1] 0.05677486
The average, median and standard deviation of wood volume in Latham Park
mean(LathamPark$volume)
## [1] 0.08859561
sd(LathamPark$volume)
## [1] 0.08513687
median(LathamPark$volume)
## [1] 0.04846692
a <- Volume %>%
group_by(Field) %>%
summarise(
"Volume Per 100 meters" = (sum(Volume))/2, # Per 100 meter
mean = mean(Volume, na.rm = TRUE),
sd = sd(Volume, na.rm = TRUE),
median = median(Volume, na.rm = TRUE),
IQR = IQR(Volume, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
a <- flextable(a)
autofit(a, add_w = 0.001, add_h = 0.1)
Field | Volume Per 100 meters | mean | sd | median | IQR |
Auduban Volume | 5.7505120 | 0.32860068 | 1.04468045 | 0.05677486 | 0.2635415 |
Lake Daniel Park volume | 1.8802691 | 0.15042153 | 0.16379296 | 0.07696902 | 0.1369954 |
Latham Park volume | 0.9302539 | 0.08859561 | 0.08513687 | 0.04846692 | 0.1188700 |
Jams are excluded in volume calculation because the Washington Survey Guide did not include diameter and length to be calculated for Jams)
Kruskal Wallis test is a non - parametric test alternative to ANOVA. This methos is useful for testing whether sample originates from the same population. The reasons for the test: • Volume is a continious data unlike total count of wood logs • The dataset is not normal (very skewed data)
boxplot(Volume~Field, data = Volume)
The result of Kurskal Wallis test is:
kruskal.test(Volume ~ Field, data = Volume)
##
## Kruskal-Wallis rank sum test
##
## data: Volume by Field
## Kruskal-Wallis chi-squared = 1.4147, df = 2, p-value = 0.493
There is no statistically significant between volume of individual pieces of Lake Daniel, Lake Latham Park and Auduban Preserve.
Field <- c("Latham Park", "Lake Daniel" , "Auduban")
Values <- c(33, 25, 60)
Pieces <- data.frame(Field, Values)
Pieces$Per100 <- Values/2
ggplot(data = Pieces, aes(x = Field, y = Per100))+
geom_col(width = 0.5)+
theme_bw()+
ylab("Pieces per 100 meters")+
xlab("")
Ho: p1 = p2= p3 Ha: At least one of the site is different from other
data <- Pieces$Per100
chisq.test(data)
##
## Chi-squared test for given probabilities
##
## data: data
## X-squared = 8.5508, df = 2, p-value = 0.01391
The difference in the frequency of Large Wood is statistically significant
CountLP <- LathamPark %>% group_by(`Channel Orient`) %>% count(name = "LathamPark")
CountAu <- Auduban %>% group_by(`Channel Orient`) %>% count(name = "Auduban")
CountLD <- LakeDanielPark %>% group_by(`Channel Orient`) %>% count(name = "LakeDanielPark")
a <- CountLP %>% right_join(CountAu) %>% right_join(CountLD)
## Joining, by = "Channel Orient"
## Joining, by = "Channel Orient"
flextable::flextable(a)
Channel Orient | LathamPark | Auduban | LakeDanielPark |
A | 8 | 11 | 11 |
B | 7 | 7 | 4 |
C | 6 | 14 | 9 |
D | NA | 3 | 1 |
mean(Auduban$`Piece Decay`)
## [1] 2.028571
sd(Auduban$`Piece Decay`)
## [1] 0.8570028
mean(LathamPark$`Piece Decay`)
## [1] 2.142857
sd(LathamPark$`Piece Decay`)
## [1] 0.853564
mean(LakeDanielPark$`Piece Decay`)
## [1] 2.08
sd(LakeDanielPark$`Piece Decay`)
## [1] 0.8124038
Sites <- c("Auduban Preserve", "Latham Park", "Lake Daniel Park")
`No. of Jams` <- c(2, 1, 0)
data.frame(Sites, `No. of Jams`)
## Sites No..of.Jams
## 1 Auduban Preserve 2
## 2 Latham Park 1
## 3 Lake Daniel Park 0
To find the actual length, we need to add all four zones together. The four zones are: zone 1, zone 2, zone 3, and zone 4. We can use a function to perform the task
library(readxl)
LathamPark <- read_excel("D:/Pilot_Study/Data.xlsx",
sheet = "LathamPark")
LakeDanielPark <- read_excel("D:/Pilot_Study/Data.xlsx",
sheet = "LakeDanielPark")
Auduban <- read_excel("D:/Pilot_Study/Data.xlsx",
sheet = "Auduban")
LathamPark <- hybrd.ifelse(LathamPark)
Auduban <- hybrd.ifelse(Auduban)
LakeDanielPark <- hybrd.ifelse(LakeDanielPark)
LathamPark <- Volume_Calc(LathamPark, LathamPark$Zone1, LathamPark$Zone2,
LathamPark$Zone3, LathamPark$Zone4, LathamPark$PieceD)
Auduban <- Volume_Calc(Auduban, Auduban$Zone1, Auduban$Zone2,
Auduban$Zone3, Auduban$Zone4, Auduban$PieceD)
LakeDanielPark <- Volume_Calc(LakeDanielPark, LakeDanielPark$Zone1,
LakeDanielPark$Zone2,
LakeDanielPark$Zone3, LakeDanielPark$Zone4,
LakeDanielPark$PieceD)
Selecting our Length column
Auduban_Length <- mutate (Auduban, "Auduban length" = length) %>%
select("Auduban length")
LakeDanielPark_Lenght <- mutate(LakeDanielPark, "Lake Daniel Park length" = length) %>%
select("Lake Daniel Park length")
LathamPark_Length <- mutate(LathamPark, "Latham Park length" = length) %>%
select("Latham Park length")
Length <- gdata::cbindX(Auduban_Length, LakeDanielPark_Lenght,
LathamPark_Length)
Length <- pivot_longer(Length, c("Lake Daniel Park length", "Latham Park length",
"Auduban length" ), names_to = "Field",
values_to = "length") %>% drop_na()
ggplot(data = Length, aes(Field, y = length))+
geom_boxplot()+
theme_bw()
kruskal.test(length ~ Field, data = Length)
##
## Kruskal-Wallis rank sum test
##
## data: length by Field
## Kruskal-Wallis chi-squared = 5.2773, df = 2, p-value = 0.07146
Auduban_Diameter <- Auduban %>%
select(diameter_m) %>%
rename(diameter_Audobon = diameter_m)
LakeDanielPark_Diameter <- LakeDanielPark %>%
select(diameter_m) %>%
rename(diameter_LakeDanielPark = diameter_m)
LathamPark_Diameter <- LathamPark %>%
select(diameter_m) %>%
rename(diamater_LathamPark = diameter_m)
Diameter <- gdata::cbindX(Auduban_Diameter, LakeDanielPark_Diameter,
LathamPark_Diameter)
Diameter <- pivot_longer(Diameter, c("diameter_Audobon", "diameter_LakeDanielPark",
"diamater_LathamPark" ), names_to = "Field",
values_to = "diameter") %>% drop_na()
ggplot(data = Diameter, aes(x = diameter, fill = "red"))+
geom_histogram()+
theme_bw()+
theme(legend.position = "none")+
facet_wrap(~Field)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
kruskal.test(diameter ~ Field, data = Diameter)
##
## Kruskal-Wallis rank sum test
##
## data: diameter by Field
## Kruskal-Wallis chi-squared = 5.3667, df = 2, p-value = 0.06833
Raj collecting dead large wood data
Feel free to contact me if you have any questions
Thanks