Introduction

Data set, statistical analyses, graphics and inference concerning bird abundance, species richness in the presence/absence of Hydrilla.

Citation: R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/.

Distribution

This page is not yet public on rpubs: http://rpubs.com/puruckertom/lake_thurmond

The git repo holding this code is available at: https://github.com/puruckertom/bharam_lakerussell

Computational environment

Version and installed libraries.

verbose_output = TRUE

if(verbose_output){
  print(Sys.info()[4])
  R.Version()$version.string
}

##          nodename 
## "DZ2626UTPURUCKE"

## [1] "R version 3.3.2 (2016-10-31)"

#check to see if needed packages are installed
list.of.packages <- c("ggplot2", "dplyr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
#load relevant packages
library(MASS)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.3.3

library(BSDA)

## Warning: package 'BSDA' was built under R version 3.3.3

## Loading required package: e1071

## Warning: package 'e1071' was built under R version 3.3.3

## Loading required package: lattice

## 
## Attaching package: 'BSDA'

## The following object is masked from 'package:datasets':
## 
##     Orange

if(verbose_output){
  print("list of loaded packages: ")
  print((.packages()))
}

## [1] "list of loaded packages: "
##  [1] "BSDA"      "lattice"   "e1071"     "ggplot2"   "dplyr"    
##  [6] "MASS"      "stats"     "graphics"  "grDevices" "utils"    
## [11] "datasets"  "methods"   "base"

Observational data

Original file with observational data: https://github.com/puruckertom/bharam_lakerussell/blob/master/data_in/lakerussell_data.txt

If only x is given, or if both x and y are given and paired is TRUE, a Wilcoxon signed rank test of the null that the distribution of x (in the one sample case) or of x - y (in the paired two sample case) is symmetric about mu is performed.

##################
#the data sets
##################
#everything
file.exists(paste(lr_data,"lakerussell_data.csv",sep=""))

## [1] TRUE

lr_obs <- read.table(paste(lr_data,"lakerussell_data.csv",sep=""), header = TRUE, sep = ",")
str(lr_obs)

## 'data.frame':    112 obs. of  8 variables:
##  $ Site      : Factor w/ 16 levels "Amity","Big Hart",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ DOY       : int  311 317 333 338 339 348 354 311 317 333 ...
##  $ Hectare   : num  4.8 4.8 4.8 4.8 4.8 4.8 4.8 17.4 17.4 17.4 ...
##  $ Hydrilla  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Bird      : int  0 1 0 0 4 0 0 1 0 0 ...
##  $ Bird.ha   : num  0 0.208 0 0 0.833 ...
##  $ Species   : int  0 1 0 0 2 0 0 1 0 0 ...
##  $ Species.ha: num  0 0.208 0 0 0.417 ...

lr.nohydrilla <- lr_obs[which(lr_obs$Hydrilla==1),]
lr.hydrilla <- lr_obs[which(lr_obs$Hydrilla==2),]

Statistics

wilcox test as a generalization of the binomial test, bootstrapped test statistic by randomly sampling the hydrilla to pair with the 22 nonhydrilla samples for each realization, repeated 1000 times to create a distribution of the test statistic

the null is that the median of the differences of bird abundance and/or density (between paired hydrilla and nonhydrilla samples) is equal to zero, ties are therefore equal to zero and should not be discarded

wilcoxon rank rum (= mann-whitney u) can also handle unequal sample sizes

the sign.test does not reliably return a htest object as it is supposed to: http://r.789695.n4.nabble.com/SIGN-test-td4654434.html

Inference

pvalues.birddensity <- rep(NA, 1000)
for(i in 1:1000){
  nohydrilla.birddensity <- lr.nohydrilla$Bird.ha
  hydrilla.birddensity <- sample(lr.hydrilla$Bird.ha,size=22)
  delta.birddensity <- hydrilla.birddensity - nohydrilla.birddensity
  hydrilla_greater <- length(which(delta.birddensity>0))
  #exact suppresses error warnings
  #x <- wilcox.test(nohydrilla.birddensity, hydrilla.birddensity,paired=TRUE,exact=FALSE)$p.value
  #y <- SIGN.test(nohydrilla.birddensity, hydrilla.birddensity, md = 0, alternative = "two.sided", conf.level = 0.95)
  x <- binom.test(hydrilla_greater,22,p=0.5,alternative="two.sided",conf.level=0.95)
  pvalues.birddensity[i] <- x$p.value
}
hist(pvalues.birddensity)

paste("median = ", median(pvalues.birddensity))

## [1] "median =  0.000855445861816406"

pvalues.birdabundance <- rep(NA, 1000)
for(i in 1:1000){
  nohydrilla.birdabundance <- lr.nohydrilla$Species.ha
  hydrilla.birdabundance <- sample(lr.hydrilla$Species.ha,size=22)
  delta.birdabundance <- hydrilla.birdabundance - nohydrilla.birdabundance
  hydrilla_greater <- length(which(delta.birdabundance>0))
  #exact suppresses error warnings
  #z <- wilcox.test(nohydrilla.birdabundance, hydrilla.birdabundance,paired=TRUE,exact=FALSE)$p.value
  #y <- SIGN.test(nohydrilla.birdabundance, hydrilla.birdabundance, md = 0, alternative = "two.sided", conf.level = 0.95)
  x <- binom.test(hydrilla_greater,22,p=0.5,alternative="two.sided",conf.level=0.95)
  pvalues.birdabundance[i] <- x$p.value
}
hist(pvalues.birdabundance)

paste("median = ", median(pvalues.birdabundance))

## [1] "median =  0.00434350967407227"