Analysis of North Buffalo Creek Study Data

Pilot Study

Rajesh Sigdel

2021-03-02


This publication features an analysis of data collected during a pilot study of dead large wood (< 1 m in length and <10 cm diameter) at Greensboro’s North Buffalo Creek on October 23 and 24, 2020. Three different reaches (sites) of the creek were selected and studied: Latham Park, Audubon Preserve, and Lake Daniel Park (see map). Two hundred meters of each reach was surveyed by following Washington State Large Woody Debris Survey Protocol. Metrics were evaluated based on the following questions:

Importing data

We need to import the dataset first. The dataset can be downloaded from here. (If using windows, press “control” button and click here it will open in your default browser)

setwd("D:/Pilot_Study/Descriptive-statistics-Mann-Whitney-U-test-and-Chi-squared-test")

library(readxl)

LathamPark <- read_excel("D:/Pilot_Study/Data.xlsx", 
                   sheet = "LathamPark")


LakeDanielPark <- read_excel("D:/Pilot_Study/Data.xlsx", 
                         sheet = "LakeDanielPark")

Auduban <- read_excel("D:/Pilot_Study/Data.xlsx", 
                             sheet = "Auduban")

Installing packages

# install.packages("tidyverse")
library(tidyverse)
# install.packages("devtools")
# devtools::install_github("thomasp85/patchwork")
library(patchwork)
# install.packages("gdata")
library(gdata)
# install.packages("flextable")
library(flextable)
#install.packages("leaflet")
library(leaflet)

Map of the Study site

library(readxl)
data <- read_excel("Data_for_map.xlsx")




m <- leaflet() %>% 
  addTiles() %>% 
  addMarkers( data = data, lng = ~Long, lat = ~lat, 
              popup = ~ Name)
m

We are now converting all the Null Values into zeros using a function

hybrd.ifelse <- function(x) {
  mutate_all(x, ~ifelse(is.na(.), 0, .)) 
}

Now, applying the function into our dataframe

LathamPark <- hybrd.ifelse(LathamPark)
Auduban <- hybrd.ifelse(Auduban)
LakeDanielPark <- hybrd.ifelse(LakeDanielPark)


The formula to calculate Volume is:


Volume_Calc <- function(dataframe, 
                   Zone1, Zone2, Zone3, Zone4, PieceD) {
  length <-  (Zone1 + Zone2 + Zone3+ Zone4)
  diameter_m <- (PieceD/100) # Converting diameter into meter
  radius <- diameter_m/2     # Radius = d/2
  volume <- (length*pi*(radius^2))  # L*pi*r^2
  a <- cbind(dataframe, length, diameter_m, radius, volume)
}

Applying the function into our dataframe

LathamPark <- Volume_Calc(LathamPark, LathamPark$Zone1, LathamPark$Zone2, 
         LathamPark$Zone3, LathamPark$Zone4, LathamPark$PieceD)



Auduban <- Volume_Calc(Auduban, Auduban$Zone1, Auduban$Zone2, 
                         Auduban$Zone3, Auduban$Zone4, Auduban$PieceD)

LakeDanielPark <- Volume_Calc(LakeDanielPark, LakeDanielPark$Zone1, 
                                LakeDanielPark$Zone2, 
                                LakeDanielPark$Zone3, LakeDanielPark$Zone4, 
                                LakeDanielPark$PieceD)

Selecting our volume column

Auduban_V <- mutate (Auduban, "Auduban Volume" = volume) %>% 
  select("Auduban Volume")
LakeDanielPark_V <- mutate(LakeDanielPark, "Lake Daniel Park volume" = volume) %>% 
  select("Lake Daniel Park volume")
LathamPark_V <- mutate(LathamPark, "Latham Park volume" = volume) %>% 
  select("Latham Park volume")
Volume <- gdata::cbindX(LakeDanielPark_V, Auduban_V,
       LathamPark_V)
Volume <- pivot_longer(Volume, c("Lake Daniel Park volume", "Latham Park volume", 
                       "Auduban Volume" ), names_to = "Field",
             values_to = "Volume") %>% drop_na()
ggplot(Volume, aes(x = Field, y = Volume))+
  geom_boxplot()+
  theme_bw()+
  annotate(geom="text", x=1.3, y=6.5, label="Very Large tree",
           color="red")

Volume_hist<- function(dataframe, Site){
  a <- filter(dataframe, Field == Site)
  b <- ggplot(a, aes(x = Volume, fill= Site))+
    geom_histogram()+
    labs (x = Site)+
    theme_bw()+
    theme(legend.position = "none")
}
Auduban_V_ggplot <- Volume_hist(Volume, "Auduban Volume")
LathamPark_V_ggplot <- Volume_hist(Volume, "Latham Park volume")
LakeDanielPark_V_ggplot <- Volume_hist(Volume, "Lake Daniel Park volume")
All_site_hist <- 
  ggplot(Volume, aes(x = Volume, fill = Volume))+
  geom_histogram()+
  labs (x = "All sites")+
  theme_bw()+
  theme(legend.position = "none")
(Auduban_V_ggplot | LathamPark_V_ggplot) / 
  (LakeDanielPark_V_ggplot | All_site_hist)

The average, median and standard deviation of wood volume in Lake Daniel

mean(LakeDanielPark$volume)
## [1] 0.1504215
sd(LakeDanielPark$volume)
## [1] 0.163793
median(LakeDanielPark$volume)
## [1] 0.07696902

The average, median and standard deviation of wood volume in Audubon

mean(Auduban$volume)
## [1] 0.3286007
sd(Auduban$volume)
## [1] 1.04468
median(Auduban$volume)
## [1] 0.05677486

The average, median and standard deviation of wood volume in Latham Park

mean(LathamPark$volume)
## [1] 0.08859561
sd(LathamPark$volume)
## [1] 0.08513687
median(LathamPark$volume)
## [1] 0.04846692
a <- Volume %>% 
  group_by(Field) %>%
  summarise(
    "Volume Per 100 meters"  = (sum(Volume))/2,  # Per 100 meter
    mean = mean(Volume, na.rm = TRUE),
    sd = sd(Volume, na.rm = TRUE),
    median = median(Volume, na.rm = TRUE),
    IQR = IQR(Volume, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
a <- flextable(a)
autofit(a, add_w = 0.001, add_h = 0.1)

Field

Volume Per 100 meters

mean

sd

median

IQR

Auduban Volume

5.7505120

0.32860068

1.04468045

0.05677486

0.2635415

Lake Daniel Park volume

1.8802691

0.15042153

0.16379296

0.07696902

0.1369954

Latham Park volume

0.9302539

0.08859561

0.08513687

0.04846692

0.1188700

Jams are excluded in volume calculation because the Washington Survey Guide did not include diameter and length to be calculated for Jams)

Kruskal Wallis test

Kruskal Wallis test is a non - parametric test alternative to ANOVA. This methos is useful for testing whether sample originates from the same population. The reasons for the test: • Volume is a continious data unlike total count of wood logs • The dataset is not normal (very skewed data)

boxplot(Volume~Field, data = Volume)

The result of Kurskal Wallis test is:

kruskal.test(Volume ~ Field, data = Volume)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Volume by Field
## Kruskal-Wallis chi-squared = 1.4147, df = 2, p-value = 0.493

There is no statistically significant between volume of individual pieces of Lake Daniel, Lake Latham Park and Auduban Preserve.


Frequency of Large wood per 100 meters

Field <- c("Latham Park", "Lake Daniel" , "Auduban")

Values <- c(33, 25, 60)

Pieces <- data.frame(Field, Values)

Pieces$Per100 <- Values/2

ggplot(data = Pieces, aes(x = Field, y = Per100))+
  geom_col(width = 0.5)+
  theme_bw()+
  ylab("Pieces per 100 meters")+
  xlab("")

Ho: p1 = p2= p3 Ha: At least one of the site is different from other

data <- Pieces$Per100
chisq.test(data)
## 
##  Chi-squared test for given probabilities
## 
## data:  data
## X-squared = 8.5508, df = 2, p-value = 0.01391

The difference in the frequency of Large Wood is statistically significant


Channel Orientation

CountLP <- LathamPark %>% group_by(`Channel Orient`) %>% count(name = "LathamPark") 
CountAu <- Auduban %>% group_by(`Channel Orient`) %>% count(name = "Auduban")
CountLD <- LakeDanielPark %>% group_by(`Channel Orient`) %>% count(name = "LakeDanielPark")





a <- CountLP %>% right_join(CountAu) %>% right_join(CountLD)
## Joining, by = "Channel Orient"
## Joining, by = "Channel Orient"
flextable::flextable(a)

Channel Orient

LathamPark

Auduban

LakeDanielPark

A

8

11

11

B

7

7

4

C

6

14

9

D

NA

3

1


Degradation Class

mean(Auduban$`Piece Decay`)
## [1] 2.028571
sd(Auduban$`Piece Decay`)
## [1] 0.8570028
mean(LathamPark$`Piece Decay`)
## [1] 2.142857
sd(LathamPark$`Piece Decay`)
## [1] 0.853564
mean(LakeDanielPark$`Piece Decay`)
## [1] 2.08
sd(LakeDanielPark$`Piece Decay`)
## [1] 0.8124038

Jams

Sites <- c("Auduban Preserve", "Latham Park", "Lake Daniel Park")
`No. of Jams` <- c(2, 1, 0)

data.frame(Sites, `No. of Jams`)
##              Sites No..of.Jams
## 1 Auduban Preserve           2
## 2      Latham Park           1
## 3 Lake Daniel Park           0

To find the actual length, we need to add all four zones together. The four zones are: zone 1, zone 2, zone 3, and zone 4. We can use a function to perform the task

library(readxl)

LathamPark <- read_excel("D:/Pilot_Study/Data.xlsx", 
                   sheet = "LathamPark")


LakeDanielPark <- read_excel("D:/Pilot_Study/Data.xlsx", 
                         sheet = "LakeDanielPark")

Auduban <- read_excel("D:/Pilot_Study/Data.xlsx", 
                             sheet = "Auduban")

LathamPark <- hybrd.ifelse(LathamPark)
Auduban <- hybrd.ifelse(Auduban)
LakeDanielPark <- hybrd.ifelse(LakeDanielPark)


LathamPark <- Volume_Calc(LathamPark, LathamPark$Zone1, LathamPark$Zone2, 
         LathamPark$Zone3, LathamPark$Zone4, LathamPark$PieceD)



Auduban <- Volume_Calc(Auduban, Auduban$Zone1, Auduban$Zone2, 
                         Auduban$Zone3, Auduban$Zone4, Auduban$PieceD)

LakeDanielPark <- Volume_Calc(LakeDanielPark, LakeDanielPark$Zone1, 
                                LakeDanielPark$Zone2, 
                                LakeDanielPark$Zone3, LakeDanielPark$Zone4, 
                                LakeDanielPark$PieceD)

Selecting our Length column

Auduban_Length <- mutate (Auduban, "Auduban length" = length) %>% 
  select("Auduban length")
LakeDanielPark_Lenght <- mutate(LakeDanielPark, "Lake Daniel Park length" = length) %>% 
  select("Lake Daniel Park length")
LathamPark_Length <- mutate(LathamPark, "Latham Park length" = length) %>% 
  select("Latham Park length")
Length <- gdata::cbindX(Auduban_Length, LakeDanielPark_Lenght,
       LathamPark_Length)
Length <- pivot_longer(Length, c("Lake Daniel Park length", "Latham Park length", 
                       "Auduban length" ), names_to = "Field",
             values_to = "length") %>% drop_na()

Perforning Kruskal Wallis Test on Length

ggplot(data = Length, aes(Field, y = length))+
  geom_boxplot()+
  theme_bw()

kruskal.test(length ~ Field, data = Length)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  length by Field
## Kruskal-Wallis chi-squared = 5.2773, df = 2, p-value = 0.07146

Performing Kurskal Wallis Test on Diameter

Auduban_Diameter <- Auduban %>% 
  select(diameter_m) %>% 
  rename(diameter_Audobon = diameter_m)

LakeDanielPark_Diameter <- LakeDanielPark %>% 
  select(diameter_m) %>% 
  rename(diameter_LakeDanielPark = diameter_m)


LathamPark_Diameter <- LathamPark %>% 
  select(diameter_m) %>% 
  rename(diamater_LathamPark = diameter_m)
Diameter <- gdata::cbindX(Auduban_Diameter, LakeDanielPark_Diameter,
       LathamPark_Diameter)
Diameter <- pivot_longer(Diameter, c("diameter_Audobon", "diameter_LakeDanielPark", 
                       "diamater_LathamPark" ), names_to = "Field",
             values_to = "diameter") %>% drop_na()
ggplot(data = Diameter, aes(x = diameter, fill = "red"))+
  geom_histogram()+
  theme_bw()+
  theme(legend.position = "none")+
  facet_wrap(~Field)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

kruskal.test(diameter ~ Field, data = Diameter)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  diameter by Field
## Kruskal-Wallis chi-squared = 5.3667, df = 2, p-value = 0.06833


Raj collecting dead large wood data

Feel free to contact me if you have any questions

Thanks