This code is for my senior thesis and analyzes field and remotely-sensed data. Generative AI from ChatGPT3.5 assisted in the making of this code. In this code, I used the forest classification names dense and established interchangeably. They both denote terra firma forest. Similar, the riverine forest type is the same as the swamp or flooded forest type. This code is sequential: products from previous chunks are used in subsequent ones.
—- PRELIMINARIES AND DATA UPLOADS —- Uploading all data from “DATA” folder.
Adding to library
# install necessary libraries
library(ape)
library(car)
## Loading required package: carData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:ape':
##
## where
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(dunn.test)
library(FSA)
## Registered S3 methods overwritten by 'FSA':
## method from
## confint.boot car
## hist.boot car
## ## FSA v0.9.5. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
##
## Attaching package: 'FSA'
## The following object is masked from 'package:car':
##
## bootCase
library(ForestGapR)
library(ForestTools)
library(foreach)
library(ggplot2)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:ape':
##
## rotate
library(ggsignif)
library(lattice)
library(lidR)
library(multcompView)
library(raster)
## Loading required package: sp
##
## Attaching package: 'raster'
## The following objects are masked from 'package:lidR':
##
## projection, projection<-
## The following object is masked from 'package:dplyr':
##
## select
library(readr)
library(RColorBrewer)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
##
## Attaching package: 'sf'
## The following object is masked from 'package:lidR':
##
## st_concave_hull
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Registered S3 method overwritten by 'spdep':
## method from
## plot.mst ape
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:raster':
##
## extract
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ tidyr::extract() masks raster::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ raster::select() masks dplyr::select()
## ✖ purrr::some() masks car::some()
## ✖ purrr::when() masks foreach::when()
## ✖ dplyr::where() masks ape::where()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
## Loading required package: permute
## This is vegan 2.6-4
library(viridis)
## Loading required package: viridisLite
Upload MATLAB processed data products and field data ready for analysis
setwd("/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/") # set working directory
# row expansion done in matlab
PW <- read_csv("plot_w_WD.csv") # read in data on all expanded rows organized with matlab data_work.m
## Rows: 12142 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Plots1, Plots2, Plots3, Plots4
## dbl (8): Plots5, Plots6, Plots7, Plots8, Plots9, Plots10, Plots11, Plots12
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PW) # examine data
## # A tibble: 6 × 12
## Plots1 Plots2 Plots3 Plots4 Plots5 Plots6 Plots7 Plots8 Plots9 Plots10 Plots11
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 S7 savan… S7NW XAS 9 3 5 0 0 0 0.518
## 2 S7 savan… S7NW XAS 9 3 5 0 0 0 0.518
## 3 S7 savan… S7NW XAS 9 3 5 0 0 0 0.518
## 4 S7 savan… S7NW XAS 9 3 5 0 0 0 0.518
## 5 S7 savan… S7NW XAS 9 3 5 0 0 0 0.518
## 6 S7 savan… S7NW XAS 9 3 5 0 0 0 0.518
## # ℹ 1 more variable: Plots12 <dbl>
PL <- read_csv("plot_L.csv") # read in data on large expanded rows organized with matlab data_work.m
## Rows: 226 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Large_tal1, Large_tal2, Large_tal3, Large_tal4
## dbl (4): Large_tal5, Large_tal6, Large_tal7, Large_tal8
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PL) # examine data
## # A tibble: 6 × 8
## Large_tal1 Large_tal2 Large_tal3 Large_tal4 Large_tal5 Large_tal6 Large_tal7
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 D1_1985 succession 1985NW ZCM 1 0 0.647
## 2 D1_1985 succession 1985NW XCA 2 0 0.799
## 3 D1_1985 succession 1985NW XCA 2 0 0.799
## 4 D1_1985 succession 1985NW ZLA 1 0 0.898
## 5 D1_1985 succession 1985NW ZMM 1 0 0.588
## 6 D1_1985 succession 1985SW ZRW 1 0 0.744
## # ℹ 1 more variable: Large_tal8 <dbl>
PM <- read_csv("plot_M.csv") # read in data on medium expanded rows organized with matlab data_work.m
## Rows: 3800 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Medium_tal1, Medium_tal2, Medium_tal3, Medium_tal4
## dbl (4): Medium_tal5, Medium_tal6, Medium_tal7, Medium_tal8
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PM) # examine data
## # A tibble: 6 × 8
## Medium_tal1 Medium_tal2 Medium_tal3 Medium_tal4 Medium_tal5 Medium_tal6
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 S7 savanna S7NW XAS 5 0
## 2 S7 savanna S7NW XAS 5 0
## 3 S7 savanna S7NW XAS 5 0
## 4 S7 savanna S7NW XAS 5 0
## 5 S7 savanna S7NW XAS 5 0
## 6 S7 savanna S7NW XHA 2 4
## # ℹ 2 more variables: Medium_tal7 <dbl>, Medium_tal8 <dbl>
PS <- read_csv("plot_S.csv") # read in data on small expanded rows organized with matlab data_work.m
## Rows: 8116 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Small_tal1, Small_tal2, Small_tal3, Small_tal4
## dbl (4): Small_tal5, Small_tal6, Small_tal7, Small_tal8
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PS) # examine data
## # A tibble: 6 × 8
## Small_tal1 Small_tal2 Small_tal3 Small_tal4 Small_tal5 Small_tal6 Small_tal7
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 S7 savanna S7NW XAS 9 3 0.518
## 2 S7 savanna S7NW XAS 9 3 0.518
## 3 S7 savanna S7NW XAS 9 3 0.518
## 4 S7 savanna S7NW XAS 9 3 0.518
## 5 S7 savanna S7NW XAS 9 3 0.518
## 6 S7 savanna S7NW XAS 9 3 0.518
## # ℹ 1 more variable: Small_tal8 <dbl>
FEPS <- read_csv("FEPS.csv") # read in data on non-expanded all rows
## New names:
## Rows: 1526 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Plot, Class, 10m_Subplot, Species dbl (6): C1, C2, C3, C4, C5, C6 lgl (1):
## ...11
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...11`
head(PS) # examine data
## # A tibble: 6 × 8
## Small_tal1 Small_tal2 Small_tal3 Small_tal4 Small_tal5 Small_tal6 Small_tal7
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 S7 savanna S7NW XAS 9 3 0.518
## 2 S7 savanna S7NW XAS 9 3 0.518
## 3 S7 savanna S7NW XAS 9 3 0.518
## 4 S7 savanna S7NW XAS 9 3 0.518
## 5 S7 savanna S7NW XAS 9 3 0.518
## 6 S7 savanna S7NW XAS 9 3 0.518
## # ℹ 1 more variable: Small_tal8 <dbl>
DBH <- read_csv("DBH.csv") # read in data of DBHs by plot
## Rows: 772 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Plot, Type, Sp
## dbl (1): DBH
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(DBH) # examine data
## # A tibble: 6 × 4
## Plot Type Sp DBH
## <chr> <chr> <chr> <dbl>
## 1 DU_1 succession ZRA 5.1
## 2 DU_1 succession ZPH 8.43
## 3 DU_1 succession KIE 8
## 4 DU_1 succession ZHU 44.5
## 5 DU_1 succession ZPH 5
## 6 DU_1 succession ZPC 7.1
MRes <- read_csv("MRes.csv") # read in non-spatial mar CHM data
## Rows: 417424 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Longitude, Latitude, CHM_Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(MRes) # examine data
## # A tibble: 6 × 3
## Longitude Latitude CHM_Value
## <dbl> <dbl> <dbl>
## 1 527386. 85239. 26
## 2 527387. 85239. 25
## 3 527388. 85239. 26
## 4 527389. 85239. 27
## 5 527390. 85239. 27
## 6 527391. 85239. 28
RRes <- read_csv("RRes.csv") # read in non-spatial riv CHM data
## Rows: 416998 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Longitude, Latitude, CHM_Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(MRes) # examine data
## # A tibble: 6 × 3
## Longitude Latitude CHM_Value
## <dbl> <dbl> <dbl>
## 1 527386. 85239. 26
## 2 527387. 85239. 25
## 3 527388. 85239. 26
## 4 527389. 85239. 27
## 5 527390. 85239. 27
## 6 527391. 85239. 28
DRes <- read_csv("DRes.csv") # read in non-spatial den CHM data
## Rows: 417212 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Longitude, Latitude, CHM_Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(MRes) # examine data
## # A tibble: 6 × 3
## Longitude Latitude CHM_Value
## <dbl> <dbl> <dbl>
## 1 527386. 85239. 26
## 2 527387. 85239. 25
## 3 527388. 85239. 26
## 4 527389. 85239. 27
## 5 527390. 85239. 27
## 6 527391. 85239. 28
SPDWD <- read_csv("SPWDFF.csv") # read in wood density data
## Rows: 277 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Code, Name, HERB/WOODY, SAV/FOR-G/FOR-F/VILL/TM/BAI/PRIM/SEC/U, Dis...
## dbl (6): AVG_WOOD_D, VAR, STDV, RNG_LB, RNG_UB, N
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(SPDWD) # examine data
## # A tibble: 6 × 12
## Code Name `HERB/WOODY` SAV/FOR-G/FOR-F/VILL…¹ Disperser (EL, GOR, …²
## <chr> <chr> <chr> <chr> <chr>
## 1 XAS Annona seneg… WOODY SAV EL, GOR, MON, CH, BIRD
## 2 XHA Hymenocardia… WOODY SAV WIND
## 3 XBF Bridelia fer… WOODY SAV BIRD
## 4 XMA Maprounea af… WOODY SAV BIRD
## 5 XHD Hyparrhenia … HERB <NA> <NA>
## 6 XAC Andropogon s… HERB <NA> <NA>
## # ℹ abbreviated names: ¹`SAV/FOR-G/FOR-F/VILL/TM/BAI/PRIM/SEC/U`,
## # ²`Disperser (EL, GOR, MON, CH, HOG, BIRD, US, WIND, ALL, CAP, U)`
## # ℹ 7 more variables: AVG_WOOD_D <dbl>, LEVEL <chr>, VAR <dbl>, STDV <dbl>,
## # RNG_LB <dbl>, RNG_UB <dbl>, N <dbl>
SPDWDV <- read_csv("WDVar.csv") # read in wood density data
## New names:
## Rows: 277 Columns: 15
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Code, Name, HERB/WOODY, LEVEL dbl (1): v lgl (10): ...6, ...7, ...8, ...9,
## ...10, ...11, ...12, ...13, ...14, ...15
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
head(SPDWDV) # examine data
## # A tibble: 6 × 15
## Code Name `HERB/WOODY` LEVEL v ...6 ...7 ...8 ...9 ...10 ...11 ...12
## <chr> <chr> <chr> <chr> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
## 1 XAS Anno… WOODY GE 0.74 NA NA NA NA NA NA NA
## 2 XHA Hyme… WOODY GE 0.74 NA NA NA NA NA NA NA
## 3 XBF Brid… WOODY GE 0.74 NA NA NA NA NA NA NA
## 4 XMA Mapr… WOODY GE 0.74 NA NA NA NA NA NA NA
## 5 XHD Hypa… HERB <NA> NA NA NA NA NA NA NA NA
## 6 XAC Andr… HERB <NA> NA NA NA NA NA NA NA NA
## # ℹ 3 more variables: ...13 <lgl>, ...14 <lgl>, ...15 <lgl>
subD <- read_csv("subset_DBH.csv") # read in DBH and connected density data
## Rows: 347 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Plot, Type, Sp
## dbl (2): DBH, WD
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(subD) # examine data
## # A tibble: 6 × 5
## Plot Type Sp DBH WD
## <chr> <chr> <chr> <dbl> <dbl>
## 1 R5 riverine YTR 8.8 0.475
## 2 R5 riverine RGO 13.9 0.690
## 3 R5 riverine RNP 8.8 0.642
## 4 R5 riverine RDG 8.5 0.583
## 5 R5 riverine RDG 6 0.583
## 6 R5 riverine RDG 17.7 0.583
subset_DBH <- subD
SDBH <- read_csv("SDBH.csv") # read in DBH and connected variance of density data
## Rows: 347 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Plot, Type, Sp
## dbl (5): DBH, WD, var, WD_ceil, WD_floor
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(SDBH) # examine data
## # A tibble: 6 × 8
## Plot Type Sp DBH WD var WD_ceil WD_floor
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 R5 riverine YTR 8.8 0.475 0.74 0.598 0.351
## 2 R5 riverine RGO 13.9 0.690 0.74 0.870 0.511
## 3 R5 riverine RNP 8.8 0.642 0.74 0.809 0.475
## 4 R5 riverine RDG 8.5 0.583 0.34 0.968 0.198
## 5 R5 riverine RDG 6 0.583 0.34 0.968 0.198
## 6 R5 riverine RDG 17.7 0.583 0.34 0.968 0.198
SDBH
## # A tibble: 347 × 8
## Plot Type Sp DBH WD var WD_ceil WD_floor
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 R5 riverine YTR 8.8 0.475 0.74 0.598 0.351
## 2 R5 riverine RGO 13.9 0.690 0.74 0.870 0.511
## 3 R5 riverine RNP 8.8 0.642 0.74 0.809 0.475
## 4 R5 riverine RDG 8.5 0.583 0.34 0.968 0.198
## 5 R5 riverine RDG 6 0.583 0.34 0.968 0.198
## 6 R5 riverine RDG 17.7 0.583 0.34 0.968 0.198
## 7 R5 riverine RNP 25.5 0.642 0.74 0.809 0.475
## 8 R5 riverine RDG 7.3 0.583 0.34 0.968 0.198
## 9 R5 riverine RPM 25.4 0.585 0.34 0.971 0.199
## 10 R5 riverine RBL 34.2 0.714 0.74 0.900 0.529
## # ℹ 337 more rows
Upload raster data
setwd("/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/") # set working directory
#### CHM ####
tif_path <- "CHMs.tif" # path
CHM <- raster(tif_path) # read in
#print(CHM) # basic information
#plot(CHM) # plot
#### LiDAR LAND CLASSIFICATION ####
tif_path <- "LiDAR_B_LC.tif" # path
LidLC <- raster(tif_path) # read in
crs(LidLC) <- st_crs(CHM) # convert CRS
#print(LidLC) # basic information
#plot(LidLC) # plot
#### INTEGRATED GROUND CONTROL POINT AND LiDAR LAND CLASSIFICATION ####
tif_path <- "GCP_B_LC.tif" # path
GCPLC <- raster(tif_path) # read in
crs(GCPLC) <- st_crs(CHM) # convert CRS
#print(GCPLC) # basic information
#plot(GCPLC) # plot
#### SPATIAL CLIPPED CHM FROM CLASSIFIED SECTIONS OF LiDAR ####
## Mar
CHM_MARs <- raster("CHM_clipped_mar_sub.tif") # read in
#print(CHM_MARs) # basic information
#plot(CHM_MARs) # plot
## Riv
CHM_RIVs <- raster("CHM_clipped_riv_sub.tif") # read in
#print(CHM_RIVs) # basic information
#plot(CHM_RIVs) # plot
## Den
CHM_DENs <- raster("CHM_clipped_den_sub.tif") # read in
#print(CHM_DENs) # basic information
#plot(CHM_DENs) # plot
#### LANDSAT MOSAIC RASTER + PRODUCTS AND BANDS OF INTEREST ####
# Load the mosaic raster in
RastMos <- raster("RastMosa.tif")
# Check the structure and information of the raster
#print(RastMos)
# Load the ndvi raster in
ndv <- raster("ndv.tif")
# Check the structure and information of the raster
#print(ndv)
# Load the band 1 raster in
B1 <- raster("Band1.tif")
# Check the structure and information of the raster
#print(B1)
# Load the band 2 raster in
B2 <- raster("Band2.tif")
# Check the structure and information of the raster
#print(B2)
# Load the band 3 raster in
B3 <- raster("Band3.tif")
# Check the structure and information of the raster
#print(B3)
# Load the band 4 raster in
B4 <- raster("Band4.tif")
# Check the structure and information of the raster
#print(B4)
# Load the band 5 raster in
B5 <- raster("Band5.tif")
# Check the structure and information of the raster
#print(B5)
# Load the band 6 raster in
B6 <- raster("Band6.tif")
# Check the structure and information of the raster
#print(B6)
# Load the band 7 raster in
B7 <- raster("Band7.tif")
# Check the structure and information of the raster
#print(B7)
# Load the band 8 raster in
B8 <- raster("Band8.tif")
# Check the structure and information of the raster
#print(B8)
# Load the DTM elevation raster in
elev <- raster("Elev.tif")
# Check the structure and information of the raster
#print(elev)
# Load a lower resolution CHM raster in, reclassified for canopy gap and averaged for 10m x 10m pixels to match the LANDSAT mosaic's resolution
TENmresGap <- raster("TenMrescgap.tif") # this is an intermediate raster -- not as necessary
# Check the structure and information of the raster
#print(TENmresGap)
# Load the C prediction raster multiplied by land classsificaion number in
TimesLC <- raster("TimesLC.tif") # this is an intermediate raster -- not as necessary
# Check the structure and information of the raster
#print(TimesLC)
# Load the raster of only marantacae class C predictions in
C_onlyMAR <- raster("trempR1.tif")
# Check the structure and information of the raster
#print(C_onlyMAR)
# Load the raster of only marantacae class C predictions in
C_onlyMARC <- raster("tempMC.tif")
# Check the structure and information of the raster
#print(C_onlyMARC)
# Load the raster of only marantacae class C predictions in
C_onlyMARF <- raster("tempMF.tif")
# Check the structure and information of the raster
#print(C_onlyMARF)
#### CLIPPED RANDOM POLYGONS OF INTEREST ####
# Load the buffered points in
bufpts <- st_read("bps.shp")
## Reading layer `bps' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/bps.shp' using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 479804.6 ymin: 56113.92 xmax: 528985.2 ymax: 84417.4
## Projected CRS: Undefined Cartesian SRS with unknown unit
# Print information about the loaded dataset
#print(bufpts)
Upload polygon and point data
setwd("/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/") # set working directory
#### GROUND SAMPLING PLOT POINTS ####
# spatial points data frame for the point coordinates collected and plotted in QGIS
F100 <- data.frame(x = 492806.13198, y = 66476.26328)
F110 <- data.frame(x = 492805.8004 , y = 66666.5964)
F200 <- data.frame(x = 479814.5850, y = 56123.9184)
F300 <- data.frame(x = 485718.2253, y = 58310.0686)
F330 <- data.frame(x = 485838.1697, y = 58052.0881)
M1 <- data.frame(x = 528655, y = 84129)
M2 <- data.frame(x = 528975.2202, y = 84294.3450)
M3 <- data.frame(x = 528565.3114, y = 84356.5495)
M4 <- data.frame(x = 528950.0673, y = 84405.4275)
M5 <- data.frame(x = 528417.1, y = 84407.4)
R20 <- data.frame(x = 487629.0835, y = 64719.7980)
R30 <- data.frame(x = 489559.1121, y = 68289.3393)
R4 <- data.frame(x = 489191.9280, y = 68426.5140)
R5 <- data.frame(x = 487602.9289, y = 64402.2447)
# specify each spatial point
points <- lapply(list(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5), function(df) {
coords <- df[, c("x", "y")] # coordinates
spatial_points <- SpatialPoints(coords)
proj4string(spatial_points) <- CRS(proj4string(CHM)) # same coordinate reference system as CHM
return(spatial_points) # return
})
# combine to spatial points dataframe
all_points <- rbind(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5)
points_sp <- SpatialPointsDataFrame(coords = all_points[, c("x", "y")], data = all_points) # spatial coordinates
#print(points_sp) # basic information
#plot(points_sp) # plot
# to export the points
#sf_points <- st_as_sf(points_sp)
#shapefile_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/points_sp.shp"
#st_write(sf_points, dsn = shapefile_path)
#### BUFFERED 20 x 20 M FIELD PLOTS ####
shp_path <- "BufferFieldPlots.geojson" #path
Bfield <- st_read(shp_path) # read
## Reading layer `BufferFieldPlots' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/BufferFieldPlots.geojson'
## using driver `GeoJSON'
## Simple feature collection with 14 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 479804.6 ymin: 56113.92 xmax: 528985.2 ymax: 84417.4
## Projected CRS: WGS 84 / UTM zone 33N
Bfield <- st_transform(Bfield, crs = crs(CHM)) # convert CRS
#print(Bfield) # basic information
#plot(Bfield$geometry) # plot
#### IMBALANGA POLYGON ####
shp_path <- "polyI.gpkg" #path
clp <- st_read(shp_path) # read
## Reading layer `polyimb' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/polyI.gpkg' using driver `GPKG'
## Simple feature collection with 1 feature and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 526769.9 ymin: 83475.07 xmax: 529577.5 ymax: 86230.74
## Projected CRS: WGS 84 / UTM zone 33N
clp <- st_transform(clp, crs = crs(CHM)) # convert CRS
#print(clp) # basic information
#plot(clp) # plot
#### CHM POLYGON FOOTPRINT ####
shp_path <- "FOOTP.gpkg" # path
FOOTP <- st_read(shp_path) # read
## Reading layer `footrpint' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/FOOTP.gpkg' using driver `GPKG'
## Simple feature collection with 3 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 518521.2 ymin: 66491.91 xmax: 534650.9 ymax: 101275.4
## Projected CRS: WGS 84 / UTM zone 33N
FOOTP <- st_transform(FOOTP, crs = crs(CHM)) # convert CRS
#print(FOOTP) # info
#plot(FOOTP) # plot
#### SAMPLE POLYGONS ####
shp_path <- "PlotsRandom.gpkg" # path
Pltz <- st_read(shp_path) # read
## Reading layer `buffered' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/PlotsRandom.gpkg'
## using driver `GPKG'
## Simple feature collection with 3000 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 518507.9 ymin: 66488.79 xmax: 534669.3 ymax: 101275.4
## Projected CRS: WGS 84 / UTM zone 33N
Pltz <- st_transform(Pltz, crs = crs(CHM)) # convert CRS
#print(Pltz) # info
#plot(Pltz) # plot
#### LAND CLASSIFICATION SELECTED POLYGONS ####
## Riv
RivSub <- st_read("RivSub.geojson") # read in
## Reading layer `RivSub' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/RivSub.geojson'
## using driver `GeoJSON'
## Simple feature collection with 849 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 518858.8 ymin: 66662.76 xmax: 534661.8 ymax: 101269.8
## Geodetic CRS: WGS 84
#print(RivSub) # basic information
#plot(RivSub) # plot
## Mar
MarSub <- st_read("MarSub.geojson") # read in
## Reading layer `MarSub' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/MarSub.geojson'
## using driver `GeoJSON'
## Simple feature collection with 849 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 518664.6 ymin: 66522.39 xmax: 534652.3 ymax: 100187.2
## Geodetic CRS: WGS 84
#print(MarSub) # basic information
#plot(MarSub) # plot
## Den
DenSub <- st_read("DenSub.geojson") # read in
## Reading layer `DenSub' from data source
## `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/DenSub.geojson'
## using driver `GeoJSON'
## Simple feature collection with 849 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 518512 ymin: 66489.62 xmax: 534503.6 ymax: 101250.4
## Geodetic CRS: WGS 84
#print(DenSub) # basic information
#plot(DenSub) # plot
Info on color scheme and land classification number correspondence !DO NOT RUN!
# values, coloring, and classification all listed here
'darkseagreen2' # marantacae = 1
## [1] "darkseagreen2"
'plum3' # dense = 9
## [1] "plum3"
'tan' # bai = 10
## [1] "tan"
'midnightblue' # water = 90
## [1] "midnightblue"
'chartreuse4' # riverine = 100
## [1] "chartreuse4"
'chocolate4' # savanna = 1000
## [1] "chocolate4"
'gray9' # urban = 10000
## [1] "gray9"
—- FIELD DATA ANALYSIS —-
Rarefaction
#### GET DATA ####
# calculations done in MATLAB alpha.m program for species richness -- data is numbers of unique species per plot per habitat adding on to the past plot
dense <- c(36, 67, 89, 100, 105)
marantaceae <- c(11, 23, 27, 29, 30)
riverine <- c(24, 51, 69, 84, 0) # the fifth value for riverine DNE but code will not run if there is not a placeholder which is deleted later on in the code and will not be included in any actual analysis.
# combine all data into a single matrix
all_data <- cbind(dense, marantaceae, riverine)
# create a vector of colors for lines and points for eventual plotting
all_line_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')
all_point_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')
#### FIT LOG GROWTH FUNCTION ####
# logistic growth function to fit to data points
logistic_growth <- function(x, a, b, c) {
a / (1 + exp(-b * (x - c)))
}
# fit logistic growth model function and print results
fit_logistic_growth <- function(x_data, y_data, dataset_name, plot_color) {
fit <- nls(y_data ~ logistic_growth(x_data, a, b, c),
start = list(a = max(y_data), b = 1, c = mean(x_data)))
coefficients <- coef(fit) # get coefficients
cat("Fitted Coefficients for", dataset_name, ":\n", coefficients, "\n")
#### PLOTTING INFORMATION ####
# generate points for the fitted curve
x_fit <- seq(min(x_data), 15, length.out = 100)
y_fit <- predict(fit, newdata = data.frame(x_data = x_fit))
# plot data and fitted logistic growth curve with an expanded x-axis range
lines(x_fit, y_fit, col = plot_color, lty = 1)
# plot data points
points(x_data, y_data, pch = 16, col = plot_color)
# add R-squared value to the plot
rsquared <- 1 - (sum(residuals(fit)^2) / sum((y_data - mean(y_data))^2))
# calculate 1 SD of residuals
sd_residuals <- sd(residuals(fit))
# get information on the 1 standard deviation bounds
asymptote <- coefficients["a"] # asymptote
# get upper and lower bounds based on sd residuals
upper_bound <- asymptote + sd_residuals
lower_bound <- asymptote - sd_residuals
#print(asymptote) # view
#print(sd_residuals) # view
#print(upper_bound) # view
#print(lower_bound) # view
abline(h = asymptote, col = plot_color, lty = 2) # line at asymptote
cat("R-squared value for", dataset_name, ":", rsquared, "\n")
cat("Asymptote for", dataset_name, ":", asymptote, "\n\n")
}
#### PLOT ####
# x data for plotting
x_data <- c(1, 2, 3, 4, 5)
riverine <- c(24, 51, 69, 84) # amended riverine data without extra value
# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/raref.pdf"
# Open a PDF device
#pdf(pdf_path)
# Create a new plot
plot(x_data, dense, pch = 16, main = "Rarefaction", xlab = "Plot number", ylab = "Species richness", xlim = c(min(x_data), 15), ylim = c(0, 120))
fit_logistic_growth(x_data, dense, "dense", "plum3")
## Fitted Coefficients for dense :
## 106.4216 1.159431 1.565719
## R-squared value for dense : 0.9996046
## Asymptote for dense : 106.4216
fit_logistic_growth(x_data, marantaceae, "marantaceae", "darkseagreen2")
## Fitted Coefficients for marantaceae :
## 29.43998 1.672137 1.293561
## R-squared value for marantaceae : 0.9943621
## Asymptote for marantaceae : 29.43998
# Use the original x_data for riverine
x_data_riverine <- c(1, 2, 3, 4)
fit_logistic_growth(x_data_riverine, riverine, "riverine", "chartreuse4")
## Fitted Coefficients for riverine :
## 90.41239 1.129562 1.849361
## R-squared value for riverine : 0.995032
## Asymptote for riverine : 90.41239
# add a vertical line at x = 4: sampling cutoff
abline(v = 4, col = "red", lty = 1)
# add legend with points and asymptotes, organized by forest type with bold subheadings and title
legend("right", legend = c(expression(bold("Dense forest")), " Collected data points", " Fitted curve", " Asymptote", expression(bold("Marantaceae forest")), " Collected data points", expression(" Fitted curve"), " Asymptote", expression(bold("Riverine forest")), " Collected data points", " Fitted curve", " Asymptote", expression(bold("Sampling cutoff"))),
col = c(NA, "plum3", "plum3", "plum3", NA, "darkseagreen2", "darkseagreen2", "darkseagreen2", NA, "chartreuse4", "chartreuse4", "chartreuse4", "red"),
lty = c(NA, NA, 1, 2, NA, NA, 1, 2, NA, NA, 1, 2, 1),
pch = c(16, 16, NA, NA, 16, 16, NA, NA, 16, 16, NA, NA, NA),
title = expression(bold("Legend")),
lwd = c(NA, NA, 1, 1, NA, NA, 1, 1, NA, NA, 1, 1, 1),
text.col = c("black", rep("black", 12)),
cex = 0.8)
text(x = 9, y = 5, labels = "All R^2 values > 0.95", cex = 0.8, font = 2, col = "black") # R^2 informative text
#dev.off() #for saving
Alpha diversity species richness per plot
#### GET DATA ####
# calculations done in MATLAB plotalpha program
# create vectors of alpha diversity
established <- c(36, 29, 37, 48, 44)
marantaceae <- c(11, 2, 20, 3, 13)
riverine <- c(24, 29, 34, 36)
#### ANOVA ASSUMPTIONS ####
## test for normality
# Shapiro-Wilk test for normality dense
shapiro_test_resultDENSE <- shapiro.test(established)
# display the result
shapiro_test_resultDENSE
##
## Shapiro-Wilk normality test
##
## data: established
## W = 0.96876, p-value = 0.8673
# Shapiro-Wilk test for normality mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
shapiro_test_resultMAR
##
## Shapiro-Wilk normality test
##
## data: marantaceae
## W = 0.92774, p-value = 0.5811
# Shapiro-Wilk test for normality riv
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
shapiro_test_resultRIV
##
## Shapiro-Wilk normality test
##
## data: riverine
## W = 0.95015, p-value = 0.7171
## ALL p values > 0.05 so normality is assumed
## test variance between groups
# combine all groups into a single vector
all_data <- c(established, marantaceae, riverine)
# create a grouping variable
group <- rep(c("established", "marantaceae", "riverine"),
times = c(length(established), length(marantaceae), length(riverine)))
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(all_data, group)
# display the result
bartlett_test_result
##
## Bartlett test of homogeneity of variances
##
## data: all_data and group
## Bartlett's K-squared = 0.36474, df = 2, p-value = 0.8333
## ALL p values > 0.05 so homogeneity of variance differences is assumed
#### PLOT SPECIES RICHNESS ####
# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/alpha.pdf"
# Open a PDF device
#pdf(pdf_path)
# combine data into a list
data_list <- list(established, marantaceae, riverine)
# create boxplots
boxplot(data_list, names = c('dense', 'marantaceae', 'riverine'),
col = c('plum3', 'darkseagreen2', 'chartreuse4'),
ylab = 'Species richness', ylab.cex = 1, cex.lab = 1, cex.axis = 1,
main = 'Species richness by plot')
# add a legend
legend('bottomleft', legend = c('dense', 'marantaceae', 'riverine'),
col = c('plum3', 'darkseagreen2', 'chartreuse4'), pch = 15, cex = 1)
# Create a grouping variable
group <- rep(c("established", "marantaceae", "riverine"),
times = c(length(established), length(marantaceae), length(riverine)))
#### ANOVA ####
# perform ANOVA
anova_result <- aov(all_data ~ group)
# display the result
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 2221.4 1111 23.12 0.000115 ***
## Residuals 11 528.3 48
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# conduct Tukey's HSD post hoc test
tukey_result <- TukeyHSD(anova_result)
# display the result
print(tukey_result)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = all_data ~ group)
##
## $group
## diff lwr upr p adj
## marantaceae-established -29.00 -40.838483 -17.161517 0.0001028
## riverine-established -8.05 -20.606608 4.506608 0.2373471
## riverine-marantaceae 20.95 8.393392 33.506608 0.0023539
# extract the letters
letters <- multcompLetters(tukey_result$group[, "p adj"])
# display the letters
#print(letters)
# marantaceae riverine established
# "a" "b" "b"
# add letters to box plots
text(x = 1, y = 40, labels = "b", cex = .9, font = 2, col = "black")
text(x = 2, y = 8, labels = "a", cex = .9, font = 2, col = "black")
text(x = 3, y = 29, labels = "b", cex = .9, font = 2, col = "black")
#dev.off() #for saving
Alpha autocorrelation calculations
#### FORMAT DATA FOR FIRST INPUT OF MORANS I ####
# Code inspired by: https://stats.oarc.ucla.edu/r/faq/how-can-i-calculate-morans-i-in-r/
# spatial points data frame for each point
points <- lapply(list(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5), function(df) {
coords <- df[, c("x", "y")] # x and y coordinates from the data
spatial_points <- SpatialPoints(coords)
proj4string(spatial_points) <- CRS(proj4string(CHM)) # assign the same CRS as CHM
return(spatial_points)
})
all_points <- rbind(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5) # combine spatial points
points_sp <- SpatialPointsDataFrame(coords = all_points[, c("x", "y")], data = all_points) # x/y mapping
#plot(points_sp) # plot
#### RUN MORANS I ####
# For mar: P < 0.05 AUTOCORR*
MAR <- all_points[6:10,] # subset for mar
MAR.dists <- as.matrix(dist(cbind(MAR$x, MAR$y))) # distance matrix
MAR.dists.inv <- 1/MAR.dists # inverse distance matrix
diag(MAR.dists.inv) <- 0 # diagonals are all 0
print(MAR.dists.inv) # view
## 1 2 3 4 5
## 1 0.000000000 0.002774783 0.004088524 0.002473272 0.002730743
## 2 0.002774783 0.000000000 0.002411953 0.008780044 0.001756063
## 3 0.004088524 0.002411953 0.000000000 0.002578329 0.006381946
## 4 0.002473272 0.008780044 0.002578329 0.000000000 0.001876275
## 5 0.002730743 0.001756063 0.006381946 0.001876275 0.000000000
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] 0.1427277
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.1979179
##
## $p.value
## [1] 0.04722291
# For riv: NO AUTOCORR
RIV <- all_points[11:14,] # subset for riv
RIV.dists <- as.matrix(dist(cbind(RIV$x, RIV$y))) # distance matrix
RIV.dists.inv <- 1/RIV.dists # inverse distance matrix
diag(RIV.dists.inv) <- 0 # diagonals are all 0
print(RIV.dists.inv) # view
## 1 2 3 4
## 1 0.0000000000 0.0002464323 0.0002485884 0.0031384506
## 2 0.0002464323 0.0000000000 0.0025512108 0.0002298021
## 3 0.0002485884 0.0025512108 0.0000000000 0.0002311272
## 4 0.0031384506 0.0002298021 0.0002311272 0.0000000000
Moran.I(riverine, RIV.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.8243662
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.5577275
##
## $p.value
## [1] 0.3786333
# For dense: NO AUTOCORR
DEN <- all_points[1:5,] # subset for den
DEN.dists <- as.matrix(dist(cbind(DEN$x, DEN$y)))
DEN.dists.inv <- 1/DEN.dists # inverse distance matrix
diag(DEN.dists.inv) <- 0 # diagonals are all 0
print(DEN.dists.inv) # view
## 1 2 3 4 5
## 1 0.000000e+00 5.253938e-03 6.019823e-05 9.247962e-05 9.147051e-05
## 2 5.253938e-03 0.000000e+00 5.977000e-05 9.126227e-05 9.025593e-05
## 3 6.019823e-05 5.977000e-05 0.000000e+00 1.588458e-04 1.581111e-04
## 4 9.247962e-05 9.126227e-05 1.588458e-04 0.000000e+00 3.514931e-03
## 5 9.147051e-05 9.025593e-05 1.581111e-04 3.514931e-03 0.000000e+00
Moran.I(established, DEN.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] 0.5724514
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.4725076
##
## $p.value
## [1] 0.08175199
Wood density
#### STANDARDIZE NAMING ####
# change column names for subset_FEPSL
colnames(PS) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# change column names for subset_FEPSM
colnames(PM) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# change column names for subset_FEPSS
colnames(PL) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
PE <- PW # rename
colnames(PE) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "H3", "H4", "H5","H6", "WD", "SUMS")
R <- list() # initialize
D <- list() # initialize
M <- list() # initialize
#### WOOD DENSITY BY HABITAT ####
subset_list <- list(PE, PL, PM, PS) # subset
SUBP <- list() # initialize new list
SaveNames <- c("/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WD.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WDS.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WDM.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WDL.pdf") # for figure saving
for (i in seq_along(subset_list)) { # loop through subsets of sizes
P <- subset_list[[i]]
indices <- which(P$hab %in% c("established", "marantaceae", "riverine"))
# subset PS using the true indices
subset_P <- P[indices, ]
# mar subset for auto cor test
subset_by_habitatM <- subset(subset_P, hab == "marantaceae")
subset_by_habitatM1 <- subset(subset_by_habitatM, plot == "M1")
avg_densityM1 <- mean(subset_by_habitatM1$WD)
subset_by_habitatM2 <- subset(subset_by_habitatM, plot == "M2")
avg_densityM2 <- mean(subset_by_habitatM2$WD)
subset_by_habitatM3 <- subset(subset_by_habitatM, plot == "M3")
avg_densityM3 <- mean(subset_by_habitatM3$WD)
subset_by_habitatM4 <- subset(subset_by_habitatM, plot == "M4")
avg_densityM4 <- mean(subset_by_habitatM4$WD)
subset_by_habitatM5 <- subset(subset_by_habitatM, plot == "M5")
avg_densityM5 <- mean(subset_by_habitatM5$WD)
marantaceae <- c(avg_densityM1,avg_densityM2,avg_densityM3,avg_densityM4,avg_densityM5)
M[[i]] <- marantaceae
# den subset for auto cor test
subset_by_habitatD <- subset(subset_P, hab == "established")
subset_by_habitatDF100 <- subset(subset_by_habitatD, plot == "F100")
avg_densityDF100 <- mean(subset_by_habitatDF100$WD)
subset_by_habitatDF110 <- subset(subset_by_habitatD, plot == "F110")
avg_densityDF110 <- mean(subset_by_habitatDF110$WD)
subset_by_habitatDF200 <- subset(subset_by_habitatD, plot == "F200")
avg_densityDF200 <- mean(subset_by_habitatDF200$WD)
subset_by_habitatDF300 <- subset(subset_by_habitatD, plot == "F300")
avg_densityDF300 <- mean(subset_by_habitatDF300$WD)
subset_by_habitatDF330 <- subset(subset_by_habitatD, plot == "F330")
avg_densityDF330 <- mean(subset_by_habitatDF330$WD)
established <- c(avg_densityDF100,avg_densityDF110,avg_densityDF200,avg_densityDF300,avg_densityDF330)
D[[i]] <- established
# riv subset for auto cor test
subset_by_habitatR <- subset(subset_P, hab == "riverine")
subset_by_habitatR20 <- subset(subset_by_habitatR, plot == "R20")
avg_densityR20 <- mean(subset_by_habitatR20$WD)
subset_by_habitatR30 <- subset(subset_by_habitatR, plot == "R30")
avg_densityR30 <- mean(subset_by_habitatR30$WD)
subset_by_habitatR4 <- subset(subset_by_habitatR, plot == "R4")
avg_densityR4 <- mean(subset_by_habitatR4$WD)
subset_by_habitatR5 <- subset(subset_by_habitatR, plot == "R5")
avg_densityR5 <- mean(subset_by_habitatR5$WD)
riverine <- c(avg_densityR20,avg_densityR30,avg_densityR4,avg_densityR5) # combine
R[[i]] <- riverine
#pdf_path <- SaveNames[i]
# Open a PDF device
#pdf(pdf_path)
# create a boxplot
P_viz <- ggplot(subset_P, aes(x = hab, y = WD, fill = hab)) +
geom_boxplot() +
labs(x = 'Habitat type', y = 'Wood density', title = 'Tree wood density variations by habitat') +
labs(fill = 'Habitat Type') +
scale_fill_manual(values = c('plum3', 'darkseagreen2', 'chartreuse4')) +
theme(text = element_text(size = 20)) +
coord_cartesian(ylim = c(0, 1))
plot(P_viz) # plot
#dev.off() #for saving
#### ANOVA ASSUMPTIONS ####
## test for normality
# Shapiro-Wilk test for normality
subset_riverine <- subset(subset_P, hab == "riverine")
shapiro_test_resultRIV <- shapiro.test(subset_riverine$WD)
# display the result
print(shapiro_test_resultRIV)
subset_den <- subset(subset_P, hab == "established")
shapiro_test_resultDEN <- shapiro.test(subset_den$WD)
# display the result
print(shapiro_test_resultDEN)
subset_mar <- subset(subset_P, hab == "marantaceae")
shapiro_test_resultMAR <- shapiro.test(subset_mar$WD)
# display the result
print(shapiro_test_resultMAR)
## AT LEAST 1 p value < 0.05 so normality is NOT assumed
# and, transformation does not make data normal
## test variance between groups
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(subset_P$WD, subset_P$hab)
# display the result
print(bartlett_test_result)
## AT LEAST 1 p value < 0.05 so homogeneity of variance differences is NOT assumed
#### KURSKAL-WALLIS TEST ####
# Kruskal-Wallis Test
result <- kruskal.test(WD ~ hab, data = subset_P)
print(result) # view
# Dunn test for posthoc analysis
posthoc_result <- dunn.test(subset_P$WD, subset_P$hab, method = "bonferroni")
# there are significant differences in the variable x among the groups established, mar, and riverine
SUBP[[i]] <- subset_P
}
##
## Shapiro-Wilk normality test
##
## data: subset_riverine$WD
## W = 0.93137, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_den$WD
## W = 0.94687, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_mar$WD
## W = 0.91345, p-value = 1.034e-11
##
##
## Bartlett test of homogeneity of variances
##
## data: subset_P$WD and subset_P$hab
## Bartlett's K-squared = 56.133, df = 2, p-value = 6.468e-13
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by hab
## Kruskal-Wallis chi-squared = 377.75, df = 2, p-value < 2.2e-16
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 377.7518, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | 16.28680
## | 0.0000*
## |
## riverine | 13.05307 -9.749851
## | 0.0000* 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
##
## Shapiro-Wilk normality test
##
## data: subset_riverine$WD
## W = 0.93693, p-value = 0.009263
##
##
## Shapiro-Wilk normality test
##
## data: subset_den$WD
## W = 0.93268, p-value = 0.0003053
##
##
## Shapiro-Wilk normality test
##
## data: subset_mar$WD
## W = 0.92634, p-value = 0.1483
##
##
## Bartlett test of homogeneity of variances
##
## data: subset_P$WD and subset_P$hab
## Bartlett's K-squared = 24.363, df = 2, p-value = 5.123e-06
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by hab
## Kruskal-Wallis chi-squared = 6.9189, df = 2, p-value = 0.03145
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 6.9189, df = 2, p-value = 0.03
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | 2.626357
## | 0.0129*
## |
## riverine | 0.839537 -1.929388
## | 0.6018 0.0805
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
##
## Shapiro-Wilk normality test
##
## data: subset_riverine$WD
## W = 0.91741, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_den$WD
## W = 0.93977, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_mar$WD
## W = 0.77253, p-value = 6.168e-06
##
##
## Bartlett test of homogeneity of variances
##
## data: subset_P$WD and subset_P$hab
## Bartlett's K-squared = 21.491, df = 2, p-value = 2.155e-05
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by hab
## Kruskal-Wallis chi-squared = 135.93, df = 2, p-value < 2.2e-16
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 135.9259, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | 10.47243
## | 0.0000*
## |
## riverine | 6.564840 -8.520621
## | 0.0000* 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
##
## Shapiro-Wilk normality test
##
## data: subset_riverine$WD
## W = 0.92614, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_den$WD
## W = 0.92969, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_mar$WD
## W = 0.89989, p-value = 3.134e-11
##
##
## Bartlett test of homogeneity of variances
##
## data: subset_P$WD and subset_P$hab
## Bartlett's K-squared = 67.294, df = 2, p-value = 2.44e-15
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by hab
## Kruskal-Wallis chi-squared = 323.64, df = 2, p-value < 2.2e-16
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 323.6446, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | 13.53082
## | 0.0000*
## |
## riverine | 13.71990 -5.638251
## | 0.0000* 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
#### WOOD DENSITY BY TREE HEIGHT ####
habtypes <- list("established", "marantaceae", "riverine") # list types of habitat
SaveNames <- c("/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/estWD.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/marWD.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/rivWD.pdf")
y_limits <- c(0, .9)
for (i in seq_along(habtypes)) { # loop through habitat types
# subset for S, M, and L
subset_riverineS <- subset(SUBP[[4]] , hab == habtypes[[i]])
subset_riverineM <- subset(SUBP[[3]] , hab == habtypes[[i]])
subset_riverineL <- subset(SUBP[[2]] , hab == habtypes[[i]])
# rename
names(subset_riverineS)[7] <- "WD"
names(subset_riverineM)[7] <- "WD"
names(subset_riverineL)[7] <- "WD"
# combine the subsettted data
combined_data <- bind_rows(
mutate(subset_riverineS, Size = "Small"),
mutate(subset_riverineM, Size = "Medium"),
mutate(subset_riverineL, Size = "Large")
)
#### CHECK ANOVA ASSUMPTIONS ####
# Shapiro-Wilk test for normality
subset_SM <- subset(combined_data, Size == "Small")
shapiro_test_resultSM <- shapiro.test(subset_SM$WD)
print(shapiro_test_resultSM)
# Shapiro-Wilk test for normality
subset_MM <- subset(combined_data, Size == "Medium")
shapiro_test_resultMM <- shapiro.test(subset_MM$WD)
print(shapiro_test_resultMM)
# Shapiro-Wilk test for normality
subset_LM <- subset(combined_data, Size == "Large")
shapiro_test_resultLM <- shapiro.test(subset_LM$WD)
print(shapiro_test_resultLM)
# all p values < 0.05
## test variance between groups
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(WD ~ Size, data = combined_data)
# display the result
print(bartlett_test_result)
## p value < 0.05 so homogeneity of variance differences is NOT assumed... so KW test
#### KURSKAL-WALLIS TEST ####
result <- kruskal.test(WD ~ Size, data = combined_data)
print(result) # view
# Dunn test for posthoc
posthoc_result <- dunn.test(combined_data$WD, combined_data$Size, method = "bonferroni")
# small group differs significantly from medium and large. No significant difference between medium and large groups.
print(posthoc_result)
#pdf_path <- SaveNames[i]
# Open a PDF device
#pdf(pdf_path)
#### PLOT ####
boxplot <- ggplot(combined_data, aes(x = Size, y = WD, fill = Size)) + # boxplot
geom_boxplot() +
labs(x = 'Size', y = 'Wood density', title = paste('Wood Density for', habtypes[[i]])) +
scale_fill_manual(values = c('Small' = 'white', 'Medium' = 'orange2', 'Large' = 'tomato3')) +
theme(text = element_text(size = 14))+
ylim(y_limits) # Set y-axis limits
print(boxplot)
#dev.off() #for saving
}
##
## Shapiro-Wilk normality test
##
## data: subset_SM$WD
## W = 0.92969, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_MM$WD
## W = 0.93977, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_LM$WD
## W = 0.93268, p-value = 0.0003053
##
##
## Bartlett test of homogeneity of variances
##
## data: WD by Size
## Bartlett's K-squared = 5.6458, df = 2, p-value = 0.05943
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by Size
## Kruskal-Wallis chi-squared = 4.498, df = 2, p-value = 0.1055
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 4.498, df = 2, p-value = 0.11
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | Large Medium
## ---------+----------------------
## Medium | -2.089527
## | 0.0550
## |
## Small | -1.830495 0.911858
## | 0.1008 0.5428
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 4.498037
##
## $Z
## [1] -2.0895271 -1.8304954 0.9118588
##
## $P
## [1] 0.01833015 0.03358795 0.18092153
##
## $P.adjusted
## [1] 0.05499045 0.10076384 0.54276459
##
## $comparisons
## [1] "Large - Medium" "Large - Small" "Medium - Small"
## Warning: Removed 87 rows containing non-finite values (`stat_boxplot()`).
##
## Shapiro-Wilk normality test
##
## data: subset_SM$WD
## W = 0.89989, p-value = 3.134e-11
##
##
## Shapiro-Wilk normality test
##
## data: subset_MM$WD
## W = 0.77253, p-value = 6.168e-06
##
##
## Shapiro-Wilk normality test
##
## data: subset_LM$WD
## W = 0.92634, p-value = 0.1483
##
##
## Bartlett test of homogeneity of variances
##
## data: WD by Size
## Bartlett's K-squared = 14.687, df = 2, p-value = 0.0006467
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by Size
## Kruskal-Wallis chi-squared = 31.926, df = 2, p-value = 1.168e-07
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 31.9257, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | Large Medium
## ---------+----------------------
## Medium | 3.279640
## | 0.0016*
## |
## Small | -0.378235 -5.647023
## | 1.0000 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 31.92568
##
## $Z
## [1] 3.2796409 -0.3782358 -5.6470235
##
## $P
## [1] 5.196964e-04 3.526277e-01 8.162475e-09
##
## $P.adjusted
## [1] 1.559089e-03 1.000000e+00 2.448743e-08
##
## $comparisons
## [1] "Large - Medium" "Large - Small" "Medium - Small"
## Warning: Removed 3 rows containing non-finite values (`stat_boxplot()`).
##
## Shapiro-Wilk normality test
##
## data: subset_SM$WD
## W = 0.92614, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_MM$WD
## W = 0.91741, p-value < 2.2e-16
##
##
## Shapiro-Wilk normality test
##
## data: subset_LM$WD
## W = 0.93693, p-value = 0.009263
##
##
## Bartlett test of homogeneity of variances
##
## data: WD by Size
## Bartlett's K-squared = 69.981, df = 2, p-value = 6.365e-16
##
##
## Kruskal-Wallis rank sum test
##
## data: WD by Size
## Kruskal-Wallis chi-squared = 80.499, df = 2, p-value < 2.2e-16
##
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 80.4987, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | Large Medium
## ---------+----------------------
## Medium | -1.790757
## | 0.1100
## |
## Small | 1.246783 8.971701
## | 0.3187 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 80.49868
##
## $Z
## [1] -1.790757 1.246784 8.971702
##
## $P
## [1] 3.666614e-02 1.062384e-01 1.459839e-19
##
## $P.adjusted
## [1] 1.099984e-01 3.187152e-01 4.379518e-19
##
## $comparisons
## [1] "Large - Medium" "Large - Small" "Medium - Small"
## Warning: Removed 25 rows containing non-finite values (`stat_boxplot()`).
#### AUTOCORRELATION ####
# no autocor found in any:
# small
Moran.I(R[[1]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.8225437
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.4753479
##
## $p.value
## [1] 0.3034031
Moran.I(M[[1]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] 0.006988331
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.2194327
##
## $p.value
## [1] 0.2415391
Moran.I(D[[1]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.03501003
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.4437589
##
## $p.value
## [1] 0.628049
# medium
Moran.I(R[[2]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.5243141
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.5006785
##
## $p.value
## [1] 0.7028738
Moran.I(M[[2]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] 0.02121841
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.1574666
##
## $p.value
## [1] 0.08499954
Moran.I(D[[2]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.55921
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.5461904
##
## $p.value
## [1] 0.5713114
# large
Moran.I(R[[3]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] 0.3135492
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.4842379
##
## $p.value
## [1] 0.1815893
Moran.I(M[[3]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3999048
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.2138138
##
## $p.value
## [1] 0.4832409
Moran.I(D[[3]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.07588092
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.4310849
##
## $p.value
## [1] 0.6862796
# all
Moran.I(R[[4]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.625936
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.3669714
##
## $p.value
## [1] 0.4252509
Moran.I(M[[4]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.08207075
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.213484
##
## $p.value
## [1] 0.4315085
Moran.I(D[[4]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.1395928
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.3458645
##
## $p.value
## [1] 0.749559
Wood density variance/accuracy calculations
#### BY HEIGHT CLASS TREES VARIANCE ####
### ESTABLISHED
## small established
PS$var <- NA # initialize variance column
for (i in 1:nrow(PS)) { # loop through small trees rows
hab_class <- PS[i,2]
if (hab_class == "established") {
RowCode <- PS[i,4] # get species code
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PS$var[i] <- VVal # add it in the PS dataframe
}
}
MSv <-mean(PS$var[PS[,2] == "established"], na.rm = TRUE) # find mean variance
print(MSv)
## [1] 0.7301031
PP <- (PS$var[PS[,2] == "established"])
SSv <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## medium established
PM$var <- NA # initialize variance column
for (i in 1:nrow(PM)) { # loop through medium trees rows
hab_class <- PM[i,2]
if (hab_class == "established") {
RowCode <- PM[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PM$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MMv <-mean(PM$var[PM[,2] == "established"], na.rm = TRUE) # find mean variance
print(MMv)
## [1] 0.7292308
PP <- (PM$var[PM[,2] == "established"]) # select for type
SMv <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## large established
PL$var <- NA # initialize variance column
for (i in 1:nrow(PL)) { # loop through large trees rows
hab_class <- PL[i,2]
if (hab_class == "established") {
RowCode <- PL[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PL$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MLv <- mean(PL$var[PL[,2] == "established"], na.rm = TRUE) # find mean variance
print(MLv)
## [1] 0.7633169
PP <- (PL$var[PL[,2] == "established"]) # select for type
SLv <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## for all established height classes
all <- c(PM$var[PM[,2] == "established"], PL$var[PL[,2] == "established"], PS$var[PS[,2] == "established"]) # combine
Estall <- mean(all, na.rm = TRUE) # mean
print(Estall) # view
## [1] 0.730695
### RIVERINE
## small riverine
for (i in 1:nrow(PS)) { # loop through small trees rows
hab_class <- PS[i,2]
if (hab_class == "riverine") {
RowCode <- PS[i,4] # get species code
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PS$var[i] <- VVal # add it in the PS dataframe
}
}
MSv <-mean(PS$var[PS[,2] == "riverine"], na.rm = TRUE) # find mean variance
print(MSv) # veiw
## [1] 0.6780073
PP <- (PS$var[PS[,2] == "riverine"]) # subset by type
SSr <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## medium riverine
for (i in 1:nrow(PM)) { # loop through medium trees rows
hab_class <- PM[i,2]
if (hab_class == "riverine") {
RowCode <- PM[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PM$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MMv <-mean(PM$var[PM[,2] == "riverine"], na.rm = TRUE) # find mean variance
print(MMv)
## [1] 0.6596248
PP <- (PM$var[PM[,2] == "riverine"]) # subset by type
SMr <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## large riverine
for (i in 1:nrow(PL)) { # loop through large trees rows
hab_class <- PL[i,2]
if (hab_class == "riverine") {
RowCode <- PL[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PL$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MLv <- mean(PL$var[PL[,2] == "riverine"], na.rm = TRUE) # find mean variance
print(MLv)
## [1] 0.6807627
PP <- (PL$var[PL[,2] == "riverine"]) # subset by type
SLr <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## for all riverine height classes
all <- c(PM$var[PM[,2] == "riverine"], PL$var[PL[,2] == "riverine"], PS$var[PS[,2] == "riverine"]) # combine
Estall <- mean(all, na.rm = TRUE) # mean
print(Estall) # view
## [1] 0.6703066
### MAR
## small marantaceae
for (i in 1:nrow(PS)) { # loop through small trees rows
hab_class <- PS[i,2]
if (hab_class == "marantaceae") {
RowCode <- PS[i,4] # get species code
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PS$var[i] <- VVal # add it in the PS dataframe
}
}
MSv <-mean(PS$var[PS[,2] == "marantaceae"], na.rm = TRUE) # find mean variance
print(MSv) # view
## [1] 0.8254952
PP <- (PS$var[PS[,2] == "marantaceae"]) # subset by type
SSm <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## medium marantaceae
#PM$var <- NA # initialize variance column
for (i in 1:nrow(PM)) { # loop through medium trees rows
hab_class <- PM[i,2]
if (hab_class == "marantaceae") {
RowCode <- PM[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PM$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MMv <-mean(PM$var[PM[,2] == "marantaceae"], na.rm = TRUE) # find mean variance
print(MMv)
## [1] 0.7443657
PP <- (PM$var[PM[,2] == "marantaceae"]) # subset by type
SMm <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## large marantaceae
#PL$var <- NA # initialize variance column
for (i in 1:nrow(PL)) { # loop through large trees rows
hab_class <- PL[i,2]
if (hab_class == "marantaceae") {
RowCode <- PL[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PL$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MLv <- mean(PL$var[PL[,2] == "marantaceae"], na.rm = TRUE) # find mean variance
print(MLv)
## [1] 0.8331158
PP <- (PL$var[PL[,2] == "marantaceae"]) # subset by type
SLm <- sum(PP, na.rm = TRUE) # sum of variances for later calculation
## for all marantaceae
all <- c(PM$var[PM[,2] == "marantaceae"], PL$var[PL[,2] == "marantaceae"], PS$var[PS[,2] == "marantaceae"]) # combine
Estall <- mean(all, na.rm = TRUE) # mean
print(Estall) # view
## [1] 0.8159731
#### ALL HEIGHT CLASS TREES VARIANCE ####
## all small
small <- c(PS$var[PS[,2] == "marantaceae"], PS$var[PS[,2] == "established"], PS$var[PS[,2] == "riverine"]) # combine
Msmall <- mean(small, na.rm = TRUE) # mean
print(Msmall) # view
## [1] 0.7226849
## all medium
mid <- c(PM$var[PM[,2] == "marantaceae"], PM$var[PM[,2] == "established"], PM$var[PM[,2] == "riverine"]) # combine
Mmid <- mean(mid, na.rm = TRUE) # mean
print(Mmid) # view
## [1] 0.696713
# all large
lar <- c(PL$var[PL[,2] == "marantaceae"], PL$var[PL[,2] == "established"], PL$var[PL[,2] == "riverine"]) # combine
Mlar <- mean(lar, na.rm = TRUE) # mean
print(Mlar) # view
## [1] 0.7444667
## all all
all <- c(PM$var[PM[,2] == "established"], PL$var[PL[,2] == "established"], PS$var[PS[,2] == "established"], PM$var[PM[,2] == "marantaceae"], PL$var[PL[,2] == "marantaceae"], PS$var[PS[,2] == "marantaceae"], PM$var[PM[,2] == "riverine"], PL$var[PL[,2] == "riverine"], PS$var[PS[,2] == "riverine"]) # combine
ALLall <- mean(all, na.rm = TRUE) # mean
print(ALLall) # view
## [1] 0.7158947
Beta diversity: NMDS
#### GET DATA ####
selected_habitats <- c("marantaceae", "riverine", "established")
# create a subset based on the selected habitats for all trees
subset_FEPS <- subset(FEPS, Class %in% selected_habitats)
PL[, c(5, 6)] <- lapply(PL[, c(5, 6)], function(x) ifelse(x >= 1, 1, x))
# create a subset based on the selected habitats for large trees
subset_FEPSL <- subset(PL, hab %in% selected_habitats)
PM[, c(5, 6)] <- lapply(PM[, c(5, 6)], function(x) ifelse(x >= 1, 1, x))
# create a subset based on the selected habitats for medium trees
subset_FEPSM <- subset(PM, hab %in% selected_habitats)
# Create a subset based on the selected habitats for small trees
PS[, c(5, 6)] <- lapply(PS[, c(5, 6)], function(x) ifelse(x >= 1, 1, x))
subset_FEPSS <- subset(PS, hab %in% selected_habitats)
pdf_paths <- c(
"/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds1.pdf",
"/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds2.pdf",
"/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds3.pdf",
"/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds4.pdf"
) # paths to save in loop
#### CONVERT HEIGHT CLASS DATA ####
# sum across rows for columns 5 - 10
subset_FEPS$...11 <- rowSums(subset_FEPS[, 5:10], na.rm = TRUE)
# sum tally by unique species in plot
sum_by_plot_species <- aggregate(subset_FEPS[, 5:10], by = list(plotType = subset_FEPS$Plot, species = subset_FEPS$Species, habitat_type = subset_FEPS$Class), sum, na.rm = TRUE)
sum_by_plot_species$Total_C <- rowSums(sum_by_plot_species[, 4:9])
# unique species names as columns. one plot per row. habitat denotation then reshape the data/pivot the data
pivoted_data <- sum_by_plot_species %>%
pivot_wider(
id_cols = c("plotType", "habitat_type"),
names_from = "species",
values_from = "Total_C",
values_fill = 0
)
pivoted_data$plotType <- NULL
Singles_pivoted_data <- pivoted_data
#### REMOVE SINGLETONS ####
converted_data <- pivoted_data # to not overwrite pivoted data
# convert all numbers greater than 0 to 1 in columns 2 to the end
converted_data <- mutate_all(converted_data[, 2:ncol(converted_data)], ~ifelse(. > 0, 1, .))
# sum the columns
column_sums <- colSums(converted_data)
# identify columns to keep (where sum is greater than 1)
columns_to_keep <- names(column_sums[column_sums > 1])
pivoted_dataA <- pivoted_data[, c("habitat_type", columns_to_keep)]
#### STANDARDIZE NAMING ####
# just in case code in above sections was skipped, re-standardize... not needed
# Change column names for subset_FEPSL
#colnames(subset_FEPSL) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# Change column names for subset_FEPSM
#colnames(subset_FEPSM) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# Change column names for subset_FEPSS
#colnames(subset_FEPSS) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
#### CONVERT S M L DATA ####
# create a list of subsets
subset_list <- list(subset_FEPSL, subset_FEPSM, subset_FEPSS)
# initialize empty list to store the results
pivoted_data_list <- list()
# loop through each subset
for (i in seq_along(subset_list)) {
# perform the operations on each subset
subset <- subset_list[[i]]
subset$SUMS <- rowSums(subset[, 5:6], na.rm = TRUE)
sum_by_plot_species <- aggregate(subset[, 5:6], by = list(plotType = subset$plot, species = subset$SP, habitat_type = subset$hab), sum, na.rm = TRUE)
sum_by_plot_species$Total_C <- rowSums(sum_by_plot_species[, 4:5])
pivoted_data <- sum_by_plot_species %>%
pivot_wider(
id_cols = c("plotType", "habitat_type"),
names_from = "species",
values_from = "Total_C",
values_fill = 0
)
converted_data <- pivoted_data
# convert all numbers greater than 0 to 1 in columns 2 to the end
converted_data <- mutate_all(converted_data[, 2:ncol(converted_data)], ~ifelse(. > 0, 1, .))
# sum the columns
column_sums <- colSums(converted_data)
# identify columns to keep (where sum is greater than 1)
columns_to_keep <- names(column_sums[column_sums > 1])
pivoted_data <- pivoted_data[, c(columns_to_keep)]
# store the pivoted data in the list
pivoted_data_list[[i]] <- pivoted_data
}
# assign each pivoted dataset to separate variables
pivoted_dataL <- pivoted_data_list[[1]]
pivoted_dataM <- pivoted_data_list[[2]]
pivoted_dataS <- pivoted_data_list[[3]]
#### DISTANCE MATRIX ####
# list of data frames
pivoted_data_list <- list(pivoted_dataA, pivoted_dataL, pivoted_dataM, pivoted_dataS)
# list to store distance matrices
DMAt_list <- list()
# loop through the list
for (i in seq_along(pivoted_data_list)) {
# consolidate data
consolidated_data <- pivoted_data_list[[i]] %>%
group_by(habitat_type) %>%
summarise_all(sum)
# create distance matrix
DMAt <- vegdist(consolidated_data[, -1], method = "bray")
# store distance matrix in the list
DMAt_list[[i]] <- as.matrix(DMAt)
}
# access distance matrices from the list
DMAt <- DMAt_list[[1]] # for pivoted_data
DMAtL <- DMAt_list[[2]] # for pivoted_dataL
DMAtM <- DMAt_list[[3]] # for pivoted_dataM
DMAtS <- DMAt_list[[4]] # for pivoted_dataS
#### LOOP FOR STRESS TEST, perMANOVA, AND PLOT ####
# run on each DMAt and pivoted_data. results printed each loop
NMDS_list <- list()
MANOVARES_list <- list()
for (i in 1:4) {
pivoted_data <- pivoted_data_list[[i]]
DMAt <- DMAt_list[[i]]
#### STRESS TEST ####
set.seed(1) # set seed for replicability
goeveg::dimcheckMDS(pivoted_data[ ,-1], k = 3) # check for 3, value goes below red dotted line. Convergence!
goeveg::dimcheckMDS(pivoted_data[ ,-1], k = 2) # check for 2, value goes below red dotted line. Convergence!
goeveg::dimcheckMDS(pivoted_data[ ,-1], k = 1) # check for 1, value goes below red dotted line. Convergence!
# 1 doesn't work, so k = 2 for all of the pivoted data types
MMDS <- metaMDS(pivoted_data[ , -1], k = 2) # run MDS
MMDS # view... stress = 0.08
stressplot(MMDS) # view how the stress plot looks, R^2 is > 0.9, which means that the metaMDS analysis worked well modeling data.
#### perMANOVA ####
perMANOVA <- adonis2(as.matrix(pivoted_data[ ,-1]) ~ as.matrix(pivoted_data[ ,1])) # run perMANOVA using as.matrix to make data work with adonis2
perMANOVA # view test
summary(perMANOVA) # view summary specifics
# in NMDS space, there is a statistically significant separation by disturbance treatments, as the P value is < 0.05.
MANOVARES_list[[i]] <- perMANOVA
NMDS_list[[i]] <- MMDS
#### PLOT NMDS ####
# Plotting the NMDS solution
plot(MMDS, type = "n") # Create an empty plot
text(MMDS, display = "species", cex = 0.7) # Add species labels to the plot
# MMDS is NMDS output, splitting by habitat type
habitat_data <- data.frame(x = MMDS$points[, 1], y = MMDS$points[, 2], habitat_type = pivoted_data$habitat_type)
# matrix of NMDS coordinates
habitat_matrix <- subset(habitat_data, select = c("x", "y"))
# calculate euclidean distances
euclidean_distances <- dist(habitat_matrix)
# convert the distance object to a matrix
euclidean_matrix <- as.matrix(euclidean_distances)
# set row and column names
rownames(euclidean_matrix) <- habitat_data$habitat_type
colnames(euclidean_matrix) <- habitat_data$habitat_type
# plot heatmap
heatmap(euclidean_matrix,
Rowv = NA, Colv = NA, # Turn off clustering
col = colorRampPalette(c("white", "rosybrown1", "indianred", "brown", "firebrick4"))(20), # color palette
main = "Habitat Heatmap - Euclidean Distance")
plot(MMDS, type = "n") # create an empty plot
text(MMDS, display = "species", cex = 0.7) # add species labels to the plot
# plot species
ordiplot(MMDS, type = "n") # create an empty plot
# add species labels to the plot
orditorp(MMDS, display = "species", col = "red", air = 0.01)
# add site labels to the plot with increased size
orditorp(MMDS, display = "sites", cex = 1.25, air = 0.01)
#pdf_path <- pdf_paths[i]
# Open a PDF device
#pdf(pdf_path)
# plot species and boundaries
ordiplot(MMDS, type = "n") # create an empty plot
# add species labels to the plot in red
orditorp(MMDS, display = "species", col = "black", air = 0.01)
# add site labels to the plot with increased size and color code by treatment
orditorp(MMDS, display = "sites", cex = 2, air = 0.03,
col = ifelse(habitat_data$habitat_type == "riverine", "chartreuse4",
ifelse(habitat_data$habitat_type == "marantaceae", "darkseagreen3", "plum4")))
ordiellipse(MMDS, groups = habitat_data$habitat_type, kind = "se", conf = 0.95, col = c('plum3', 'darkseagreen2', 'chartreuse4')) # 95% elipse
legend("topright", legend = c("dense", "marantacae", "riverine"),
fill = c('plum3', 'darkseagreen2', 'chartreuse4'), title = "Habitat Type", cex = 0.8)
title("NMDS of tree species composition by habitat")
round(MMDS$stress, 4) # view stress
#dev.off() #for saving
}
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1943328
## Run 1 stress 0.3524536
## Run 2 stress 0.3929939
## Run 3 stress 0.3840533
## Run 4 stress 0.1956176
## Run 5 stress 0.3772697
## Run 6 stress 0.1927954
## ... New best solution
## ... Procrustes: rmse 0.01524285 max resid 0.04367903
## Run 7 stress 0.3252388
## Run 8 stress 0.1943328
## Run 9 stress 0.1943328
## Run 10 stress 0.3113665
## Run 11 stress 0.3930091
## Run 12 stress 0.2464218
## Run 13 stress 0.3776397
## Run 14 stress 0.4592865
## Run 15 stress 0.3650915
## Run 16 stress 0.277236
## Run 17 stress 0.2678002
## Run 18 stress 0.2047404
## Run 19 stress 0.350759
## Run 20 stress 0.4564972
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1124106
## Run 1 stress 0.1112681
## ... New best solution
## ... Procrustes: rmse 0.1158367 max resid 0.2738696
## Run 2 stress 0.07798028
## ... New best solution
## ... Procrustes: rmse 0.09716641 max resid 0.1875646
## Run 3 stress 0.1124105
## Run 4 stress 0.07798012
## ... New best solution
## ... Procrustes: rmse 0.0005749526 max resid 0.001726034
## ... Similar to previous best
## Run 5 stress 0.07798006
## ... New best solution
## ... Procrustes: rmse 0.000263677 max resid 0.0007657745
## ... Similar to previous best
## Run 6 stress 0.07798033
## ... Procrustes: rmse 0.0004538146 max resid 0.001362611
## ... Similar to previous best
## Run 7 stress 0.1143732
## Run 8 stress 0.1112681
## Run 9 stress 0.1096314
## Run 10 stress 0.07798025
## ... Procrustes: rmse 0.0003067676 max resid 0.0008992422
## ... Similar to previous best
## Run 11 stress 0.1088575
## Run 12 stress 0.07798006
## ... Procrustes: rmse 0.0001170406 max resid 0.0003145423
## ... Similar to previous best
## Run 13 stress 0.07798015
## ... Procrustes: rmse 0.0002206639 max resid 0.000577346
## ... Similar to previous best
## Run 14 stress 0.07798031
## ... Procrustes: rmse 0.0003581765 max resid 0.001011074
## ... Similar to previous best
## Run 15 stress 0.07798009
## ... Procrustes: rmse 0.0001404872 max resid 0.0003738382
## ... Similar to previous best
## Run 16 stress 0.1763144
## Run 17 stress 0.1096313
## Run 18 stress 0.1143729
## Run 19 stress 0.1769718
## Run 20 stress 0.07798005
## ... New best solution
## ... Procrustes: rmse 0.0001325533 max resid 0.0002572146
## ... Similar to previous best
## *** Best solution repeated 1 times
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.04100057
## Run 1 stress 0.04099468
## ... New best solution
## ... Procrustes: rmse 0.001852491 max resid 0.003446237
## ... Similar to previous best
## Run 2 stress 0.04099474
## ... Procrustes: rmse 6.576772e-05 max resid 0.0001243949
## ... Similar to previous best
## Run 3 stress 0.0409948
## ... Procrustes: rmse 0.001095018 max resid 0.002111007
## ... Similar to previous best
## Run 4 stress 0.04099464
## ... New best solution
## ... Procrustes: rmse 2.487539e-05 max resid 4.875293e-05
## ... Similar to previous best
## Run 5 stress 0.05971032
## Run 6 stress 0.04099477
## ... Procrustes: rmse 0.001057967 max resid 0.002039177
## ... Similar to previous best
## Run 7 stress 0.04099478
## ... Procrustes: rmse 0.0009725434 max resid 0.001874584
## ... Similar to previous best
## Run 8 stress 0.04099497
## ... Procrustes: rmse 0.001182615 max resid 0.00227601
## ... Similar to previous best
## Run 9 stress 0.06210231
## Run 10 stress 0.04099456
## ... New best solution
## ... Procrustes: rmse 0.0008879197 max resid 0.001704026
## ... Similar to previous best
## Run 11 stress 0.04688513
## Run 12 stress 0.05971075
## Run 13 stress 0.04099456
## ... Procrustes: rmse 5.290214e-05 max resid 0.0001465044
## ... Similar to previous best
## Run 14 stress 0.05971014
## Run 15 stress 0.04360502
## Run 16 stress 0.04099766
## ... Procrustes: rmse 0.001812851 max resid 0.003552028
## ... Similar to previous best
## Run 17 stress 0.04099454
## ... New best solution
## ... Procrustes: rmse 0.0004380337 max resid 0.0008285491
## ... Similar to previous best
## Run 18 stress 0.04099468
## ... Procrustes: rmse 0.0005272132 max resid 0.0009471409
## ... Similar to previous best
## Run 19 stress 0.04100388
## ... Procrustes: rmse 0.002706279 max resid 0.004962037
## ... Similar to previous best
## Run 20 stress 0.04103947
## ... Procrustes: rmse 0.005039201 max resid 0.008993202
## *** Best solution repeated 3 times
## [1] 0.19279544 0.07798005 0.04099454
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1943328
## Run 1 stress 0.413639
## Run 2 stress 0.2900389
## Run 3 stress 0.3245001
## Run 4 stress 0.2755258
## Run 5 stress 0.3728355
## Run 6 stress 0.3594308
## Run 7 stress 0.4538464
## Run 8 stress 0.2067003
## Run 9 stress 0.1943328
## ... New best solution
## ... Procrustes: rmse 2.390165e-07 max resid 5.178136e-07
## ... Similar to previous best
## Run 10 stress 0.4259043
## Run 11 stress 0.1943328
## ... Procrustes: rmse 1.919467e-07 max resid 4.291512e-07
## ... Similar to previous best
## Run 12 stress 0.1950975
## Run 13 stress 0.2077259
## Run 14 stress 0.1950975
## Run 15 stress 0.1942141
## ... New best solution
## ... Procrustes: rmse 0.01688855 max resid 0.03546575
## Run 16 stress 0.2125097
## Run 17 stress 0.3740071
## Run 18 stress 0.3536908
## Run 19 stress 0.1942141
## ... Procrustes: rmse 5.554985e-07 max resid 1.308264e-06
## ... Similar to previous best
## Run 20 stress 0.3969587
## *** Best solution repeated 1 times
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1124106
## Run 1 stress 0.07798037
## ... New best solution
## ... Procrustes: rmse 0.1287562 max resid 0.277331
## Run 2 stress 0.07798009
## ... New best solution
## ... Procrustes: rmse 0.000262116 max resid 0.00077837
## ... Similar to previous best
## Run 3 stress 0.1155293
## Run 4 stress 0.1155292
## Run 5 stress 0.114373
## Run 6 stress 0.1096313
## Run 7 stress 0.07798037
## ... Procrustes: rmse 0.0005842807 max resid 0.001751259
## ... Similar to previous best
## Run 8 stress 0.1183322
## Run 9 stress 0.07798006
## ... New best solution
## ... Procrustes: rmse 0.0002678849 max resid 0.000789668
## ... Similar to previous best
## Run 10 stress 0.07798013
## ... Procrustes: rmse 0.000144215 max resid 0.0004079904
## ... Similar to previous best
## Run 11 stress 0.1669167
## Run 12 stress 0.07798005
## ... New best solution
## ... Procrustes: rmse 4.568267e-05 max resid 0.0001215872
## ... Similar to previous best
## Run 13 stress 0.1088575
## Run 14 stress 0.1096314
## Run 15 stress 0.0779804
## ... Procrustes: rmse 0.0005210257 max resid 0.001566148
## ... Similar to previous best
## Run 16 stress 0.07798029
## ... Procrustes: rmse 0.0002778744 max resid 0.0008061927
## ... Similar to previous best
## Run 17 stress 0.1143729
## Run 18 stress 0.07798024
## ... Procrustes: rmse 0.0003987837 max resid 0.001197606
## ... Similar to previous best
## Run 19 stress 0.07798033
## ... Procrustes: rmse 0.0003307509 max resid 0.0009848813
## ... Similar to previous best
## Run 20 stress 0.1143729
## *** Best solution repeated 5 times
## [1] 0.19421410 0.07798005
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1943328
## Run 1 stress 0.4404568
## Run 2 stress 0.5255438
## Run 3 stress 0.3668296
## Run 4 stress 0.2055386
## Run 5 stress 0.480681
## Run 6 stress 0.3414128
## Run 7 stress 0.3324098
## Run 8 stress 0.4754485
## Run 9 stress 0.1950975
## Run 10 stress 0.2529896
## Run 11 stress 0.3658251
## Run 12 stress 0.3717855
## Run 13 stress 0.4750728
## Run 14 stress 0.3001204
## Run 15 stress 0.2749941
## Run 16 stress 0.356021
## Run 17 stress 0.3899074
## Run 18 stress 0.2910376
## Run 19 stress 0.2730635
## Run 20 stress 0.4445379
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: scale factor of the gradient < sfgrmin
## [1] 0.1943328
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1124106
## Run 1 stress 0.07798018
## ... New best solution
## ... Procrustes: rmse 0.1287681 max resid 0.2774338
## Run 2 stress 0.1669167
## Run 3 stress 0.1155291
## Run 4 stress 0.07798004
## ... New best solution
## ... Procrustes: rmse 0.0003062804 max resid 0.0008895488
## ... Similar to previous best
## Run 5 stress 0.1088575
## Run 6 stress 0.114373
## Run 7 stress 0.0779802
## ... Procrustes: rmse 0.0002706103 max resid 0.0008052747
## ... Similar to previous best
## Run 8 stress 0.114373
## Run 9 stress 0.07798023
## ... Procrustes: rmse 0.0003447398 max resid 0.001007648
## ... Similar to previous best
## Run 10 stress 0.1143729
## Run 11 stress 0.2305539
## Run 12 stress 0.07798029
## ... Procrustes: rmse 0.0003814663 max resid 0.001150712
## ... Similar to previous best
## Run 13 stress 0.07798037
## ... Procrustes: rmse 0.0004255913 max resid 0.001261226
## ... Similar to previous best
## Run 14 stress 0.1088575
## Run 15 stress 0.1155292
## Run 16 stress 0.1143729
## Run 17 stress 0.114373
## Run 18 stress 0.1124105
## Run 19 stress 0.0779802
## ... Procrustes: rmse 0.000286567 max resid 0.000864704
## ... Similar to previous best
## Run 20 stress 0.07798025
## ... Procrustes: rmse 0.0003257858 max resid 0.000975932
## ... Similar to previous best
## *** Best solution repeated 7 times
## Wisconsin double standardization
## Run 0 stress 0.1260786
## Run 1 stress 0.1615005
## Run 2 stress 0.1038977
## ... New best solution
## ... Procrustes: rmse 0.1879968 max resid 0.4002379
## Run 3 stress 0.1233545
## Run 4 stress 0.1504371
## Run 5 stress 0.105237
## Run 6 stress 0.1198666
## Run 7 stress 0.2173669
## Run 8 stress 0.105883
## Run 9 stress 0.1187794
## Run 10 stress 0.1679179
## Run 11 stress 0.1170822
## Run 12 stress 0.1159565
## Run 13 stress 0.1177415
## Run 14 stress 0.1708731
## Run 15 stress 0.1319013
## Run 16 stress 0.1728781
## Run 17 stress 0.1846129
## Run 18 stress 0.1018311
## ... New best solution
## ... Procrustes: rmse 0.1559028 max resid 0.2845061
## Run 19 stress 0.2002936
## Run 20 stress 0.1169588
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 6: stress ratio > sratmax
## 14: scale factor of the gradient < sfgrmin
## Wisconsin double standardization
## Run 0 stress 0.04638449
## Run 1 stress 0.04647212
## ... Procrustes: rmse 0.1455698 max resid 0.3689025
## Run 2 stress 0.05525682
## Run 3 stress 0.02883845
## ... New best solution
## ... Procrustes: rmse 0.1666909 max resid 0.2844353
## Run 4 stress 0.05359211
## Run 5 stress 0.0331679
## Run 6 stress 0.02437162
## ... New best solution
## ... Procrustes: rmse 0.1838275 max resid 0.3439847
## Run 7 stress 0.06154179
## Run 8 stress 9.176149e-05
## ... New best solution
## ... Procrustes: rmse 0.1938413 max resid 0.4466419
## Run 9 stress 0.06354182
## Run 10 stress 0.02421754
## Run 11 stress 0.04343092
## Run 12 stress 0.03841728
## Run 13 stress 0.03421767
## Run 14 stress 0.05551083
## Run 15 stress 0.05034388
## Run 16 stress 0.05466926
## Run 17 stress 0.04216925
## Run 18 stress 9.407029e-05
## ... Procrustes: rmse 0.02903055 max resid 0.05612763
## Run 19 stress 0.0317483
## Run 20 stress 0.0453951
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 8: no. of iterations >= maxit
## 2: stress < smin
## 10: stress ratio > sratmax
## Warning in metaMDS(matrix, distance = distance, k = i, trymax = trymax, :
## stress is (nearly) zero: you may have insufficient data
## Wisconsin double standardization
## Run 0 stress 6.813294e-05
## Run 1 stress 9.896408e-05
## ... Procrustes: rmse 0.1637981 max resid 0.261745
## Run 2 stress 8.862762e-05
## ... Procrustes: rmse 0.1901985 max resid 0.3565592
## Run 3 stress 9.452926e-05
## ... Procrustes: rmse 0.2049385 max resid 0.3757502
## Run 4 stress 9.394402e-05
## ... Procrustes: rmse 0.184007 max resid 0.3149623
## Run 5 stress 8.086832e-05
## ... Procrustes: rmse 0.1848313 max resid 0.3283084
## Run 6 stress 8.35827e-05
## ... Procrustes: rmse 0.1828036 max resid 0.3145629
## Run 7 stress 9.528138e-05
## ... Procrustes: rmse 0.1987372 max resid 0.4399993
## Run 8 stress 0.02473764
## Run 9 stress 9.435425e-05
## ... Procrustes: rmse 0.1528865 max resid 0.3130526
## Run 10 stress 0.03405059
## Run 11 stress 0.02230655
## Run 12 stress 7.500288e-05
## ... Procrustes: rmse 0.1824924 max resid 0.2831157
## Run 13 stress 9.78121e-05
## ... Procrustes: rmse 0.190123 max resid 0.4110007
## Run 14 stress 0.000114756
## ... Procrustes: rmse 0.1816033 max resid 0.2467262
## Run 15 stress 9.79947e-05
## ... Procrustes: rmse 0.1799444 max resid 0.3476988
## Run 16 stress 8.86299e-05
## ... Procrustes: rmse 0.1873537 max resid 0.3561717
## Run 17 stress 0.0285121
## Run 18 stress 9.501063e-05
## ... Procrustes: rmse 0.1480927 max resid 0.3431173
## Run 19 stress 0.01566551
## Run 20 stress 9.629843e-05
## ... Procrustes: rmse 0.1843928 max resid 0.3230523
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 6: no. of iterations >= maxit
## 14: stress < smin
## Warning in metaMDS(matrix, distance = distance, k = i, trymax = trymax, :
## stress is (nearly) zero: you may have insufficient data
## [1] 1.018311e-01 9.176149e-05 6.813294e-05
## Wisconsin double standardization
## Run 0 stress 0.1260786
## Run 1 stress 0.1007245
## ... New best solution
## ... Procrustes: rmse 0.145201 max resid 0.2970199
## Run 2 stress 0.1295787
## Run 3 stress 0.1721973
## Run 4 stress 0.1519206
## Run 5 stress 0.1731893
## Run 6 stress 0.1357672
## Run 7 stress 0.152639
## Run 8 stress 0.1237353
## Run 9 stress 0.1130331
## Run 10 stress 0.1311712
## Run 11 stress 0.1832615
## Run 12 stress 0.1264218
## Run 13 stress 0.103457
## Run 14 stress 0.1080866
## Run 15 stress 0.1407359
## Run 16 stress 0.1805179
## Run 17 stress 0.1127234
## Run 18 stress 0.1557465
## Run 19 stress 0.1287269
## Run 20 stress 0.112385
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 11: stress ratio > sratmax
## 9: scale factor of the gradient < sfgrmin
## Wisconsin double standardization
## Run 0 stress 0.04638449
## Run 1 stress 0.05477751
## Run 2 stress 0.02055709
## ... New best solution
## ... Procrustes: rmse 0.1676508 max resid 0.3173443
## Run 3 stress 0.03542245
## Run 4 stress 0.06339609
## Run 5 stress 0.06354552
## Run 6 stress 0.03434067
## Run 7 stress 0.04904367
## Run 8 stress 0.02427022
## Run 9 stress 0.04943973
## Run 10 stress 0.07217386
## Run 11 stress 0.06614757
## Run 12 stress 0.01402032
## ... New best solution
## ... Procrustes: rmse 0.1466311 max resid 0.3708759
## Run 13 stress 9.758908e-05
## ... New best solution
## ... Procrustes: rmse 0.1142528 max resid 0.1897005
## Run 14 stress 0.02570813
## Run 15 stress 0.08477442
## Run 16 stress 0.05823125
## Run 17 stress 0.04232931
## Run 18 stress 0.07039224
## Run 19 stress 0.03858976
## Run 20 stress 0.04583004
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 10: no. of iterations >= maxit
## 1: stress < smin
## 9: stress ratio > sratmax
## Warning in metaMDS(matrix, distance = distance, k = i, trymax = trymax, :
## stress is (nearly) zero: you may have insufficient data
## [1] 1.007245e-01 9.758908e-05
## Wisconsin double standardization
## Run 0 stress 0.1260786
## Run 1 stress 0.203343
## Run 2 stress 0.1388219
## Run 3 stress 0.1214044
## ... New best solution
## ... Procrustes: rmse 0.2570448 max resid 0.4814823
## Run 4 stress 0.1350864
## Run 5 stress 0.1820451
## Run 6 stress 0.1188754
## ... New best solution
## ... Procrustes: rmse 0.2330044 max resid 0.4846903
## Run 7 stress 0.1586272
## Run 8 stress 0.1369796
## Run 9 stress 0.1141689
## ... New best solution
## ... Procrustes: rmse 0.2623281 max resid 0.4492254
## Run 10 stress 0.1506308
## Run 11 stress 0.1162774
## Run 12 stress 0.1342414
## Run 13 stress 0.1556626
## Run 14 stress 0.1720211
## Run 15 stress 0.1431112
## Run 16 stress 0.1525006
## Run 17 stress 0.1312591
## Run 18 stress 0.162248
## Run 19 stress 0.1646557
## Run 20 stress 0.1904715
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 10: stress ratio > sratmax
## 10: scale factor of the gradient < sfgrmin
## [1] 0.1141689
## Wisconsin double standardization
## Run 0 stress 0.04638449
## Run 1 stress 0.04644459
## ... Procrustes: rmse 0.2056431 max resid 0.3306612
## Run 2 stress 0.05161009
## Run 3 stress 0.06929451
## Run 4 stress 0.03852015
## ... New best solution
## ... Procrustes: rmse 0.1781842 max resid 0.3840539
## Run 5 stress 0.02432429
## ... New best solution
## ... Procrustes: rmse 0.1581082 max resid 0.2960934
## Run 6 stress 0.03481109
## Run 7 stress 0.0367112
## Run 8 stress 0.03387116
## Run 9 stress 0.05687228
## Run 10 stress 0.05563853
## Run 11 stress 0.02441238
## ... Procrustes: rmse 0.08048097 max resid 0.2393151
## Run 12 stress 0.05035522
## Run 13 stress 0.03009459
## Run 14 stress 0.06921252
## Run 15 stress 8.93295e-05
## ... New best solution
## ... Procrustes: rmse 0.1347681 max resid 0.3516269
## Run 16 stress 0.02054638
## Run 17 stress 0.02660606
## Run 18 stress 0.05745513
## Run 19 stress 9.311642e-05
## ... Procrustes: rmse 0.1064271 max resid 0.1929925
## Run 20 stress 0.03848708
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 11: no. of iterations >= maxit
## 2: stress < smin
## 7: stress ratio > sratmax
## Warning in metaMDS(pivoted_data[, -1], k = 2): stress is (nearly) zero: you may
## have insufficient data
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.107638
## Run 1 stress 0.195674
## Run 2 stress 0.1113015
## Run 3 stress 0.09468634
## ... New best solution
## ... Procrustes: rmse 0.1036155 max resid 0.2466268
## Run 4 stress 0.09758271
## Run 5 stress 0.09691804
## Run 6 stress 0.08986221
## ... New best solution
## ... Procrustes: rmse 0.05151907 max resid 0.1107482
## Run 7 stress 0.2106312
## Run 8 stress 0.1311567
## Run 9 stress 0.1430587
## Run 10 stress 0.2238203
## Run 11 stress 0.1733678
## Run 12 stress 0.2075548
## Run 13 stress 0.09273886
## Run 14 stress 0.105575
## Run 15 stress 0.1260755
## Run 16 stress 0.1308612
## Run 17 stress 0.095987
## Run 18 stress 0.1015187
## Run 19 stress 0.2049858
## Run 20 stress 0.1026213
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 12: stress ratio > sratmax
## 8: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03287211
## Run 1 stress 0.02706117
## ... New best solution
## ... Procrustes: rmse 0.1025107 max resid 0.1758561
## Run 2 stress 0.05732119
## Run 3 stress 0.03000261
## Run 4 stress 0.0295881
## Run 5 stress 0.02644087
## ... New best solution
## ... Procrustes: rmse 0.0359606 max resid 0.09495919
## Run 6 stress 0.04956967
## Run 7 stress 0.04956518
## Run 8 stress 0.06205091
## Run 9 stress 0.02801466
## Run 10 stress 0.06000092
## Run 11 stress 0.06726644
## Run 12 stress 0.02936456
## Run 13 stress 0.03064426
## Run 14 stress 0.03642475
## Run 15 stress 0.03341343
## Run 16 stress 0.02697461
## Run 17 stress 0.02876596
## Run 18 stress 0.02889734
## Run 19 stress 0.02966154
## Run 20 stress 0.02847138
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 17: no. of iterations >= maxit
## 3: stress ratio > sratmax
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.01422236
## Run 1 stress 0.0160425
## Run 2 stress 0.01184598
## ... New best solution
## ... Procrustes: rmse 0.05937423 max resid 0.108933
## Run 3 stress 0.01969966
## Run 4 stress 0.04573324
## Run 5 stress 0.0123984
## Run 6 stress 0.01201006
## ... Procrustes: rmse 0.1164808 max resid 0.2582575
## Run 7 stress 0.01154864
## ... New best solution
## ... Procrustes: rmse 0.1362879 max resid 0.2768663
## Run 8 stress 0.01276063
## Run 9 stress 0.01248348
## Run 10 stress 0.01201496
## ... Procrustes: rmse 0.07089972 max resid 0.1165253
## Run 11 stress 0.01484572
## Run 12 stress 0.01522506
## Run 13 stress 0.01481153
## Run 14 stress 0.0116678
## ... Procrustes: rmse 0.06632489 max resid 0.1605576
## Run 15 stress 0.05173728
## Run 16 stress 0.01325485
## Run 17 stress 0.01233749
## Run 18 stress 0.01558809
## Run 19 stress 0.01179447
## ... Procrustes: rmse 0.1010405 max resid 0.2421525
## Run 20 stress 0.0303378
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: no. of iterations >= maxit
## [1] 0.08986221 0.02644087 0.01154864
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.107638
## Run 1 stress 0.1498635
## Run 2 stress 0.1913051
## Run 3 stress 0.09253764
## ... New best solution
## ... Procrustes: rmse 0.1387214 max resid 0.304312
## Run 4 stress 0.1820671
## Run 5 stress 0.09392184
## Run 6 stress 0.2160058
## Run 7 stress 0.1911391
## Run 8 stress 0.1366461
## Run 9 stress 0.1597226
## Run 10 stress 0.1255343
## Run 11 stress 0.09459803
## Run 12 stress 0.0921741
## ... New best solution
## ... Procrustes: rmse 0.1287216 max resid 0.3800495
## Run 13 stress 0.1178513
## Run 14 stress 0.1103657
## Run 15 stress 0.1054541
## Run 16 stress 0.1288656
## Run 17 stress 0.1005663
## Run 18 stress 0.1019884
## Run 19 stress 0.09930534
## Run 20 stress 0.1037488
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 14: stress ratio > sratmax
## 6: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03287211
## Run 1 stress 0.05302126
## Run 2 stress 0.04963663
## Run 3 stress 0.02931672
## ... New best solution
## ... Procrustes: rmse 0.1355068 max resid 0.2518441
## Run 4 stress 0.04962142
## Run 5 stress 0.03662982
## Run 6 stress 0.05260794
## Run 7 stress 0.03670526
## Run 8 stress 0.0347045
## Run 9 stress 0.05302348
## Run 10 stress 0.04163794
## Run 11 stress 0.05732297
## Run 12 stress 0.02918001
## ... New best solution
## ... Procrustes: rmse 0.1595063 max resid 0.2450427
## Run 13 stress 0.02684856
## ... New best solution
## ... Procrustes: rmse 0.1633937 max resid 0.2934725
## Run 14 stress 0.03044057
## Run 15 stress 0.05805467
## Run 16 stress 0.04962124
## Run 17 stress 0.05755521
## Run 18 stress 0.02944696
## Run 19 stress 0.05374777
## Run 20 stress 0.05732111
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 13: no. of iterations >= maxit
## 7: stress ratio > sratmax
## [1] 0.09217410 0.02684856
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.107638
## Run 1 stress 0.09495336
## ... New best solution
## ... Procrustes: rmse 0.08284395 max resid 0.151722
## Run 2 stress 0.1013772
## Run 3 stress 0.1409984
## Run 4 stress 0.0964131
## Run 5 stress 0.09409631
## ... New best solution
## ... Procrustes: rmse 0.04198279 max resid 0.09182676
## Run 6 stress 0.1092166
## Run 7 stress 0.2059908
## Run 8 stress 0.1428996
## Run 9 stress 0.08433692
## ... New best solution
## ... Procrustes: rmse 0.1095314 max resid 0.3287707
## Run 10 stress 0.09379732
## Run 11 stress 0.1100792
## Run 12 stress 0.09042387
## Run 13 stress 0.08936437
## Run 14 stress 0.09345407
## Run 15 stress 0.1122311
## Run 16 stress 0.09341088
## Run 17 stress 0.1885429
## Run 18 stress 0.2051392
## Run 19 stress 0.1665594
## Run 20 stress 0.09994648
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 14: stress ratio > sratmax
## 6: scale factor of the gradient < sfgrmin
## [1] 0.08433692
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03287211
## Run 1 stress 0.0495648
## Run 2 stress 0.0283978
## ... New best solution
## ... Procrustes: rmse 0.1506162 max resid 0.4143981
## Run 3 stress 0.04399708
## Run 4 stress 0.02746676
## ... New best solution
## ... Procrustes: rmse 0.1866391 max resid 0.5099999
## Run 5 stress 0.02644227
## ... New best solution
## ... Procrustes: rmse 0.1349 max resid 0.295937
## Run 6 stress 0.03485698
## Run 7 stress 0.03001853
## Run 8 stress 0.02681662
## ... Procrustes: rmse 0.1186172 max resid 0.2612048
## Run 9 stress 0.04406046
## Run 10 stress 0.06000121
## Run 11 stress 0.05302191
## Run 12 stress 0.02764933
## Run 13 stress 0.03554706
## Run 14 stress 0.04956439
## Run 15 stress 0.04956494
## Run 16 stress 0.02674716
## ... Procrustes: rmse 0.1196372 max resid 0.2753668
## Run 17 stress 0.04956518
## Run 18 stress 0.03686226
## Run 19 stress 0.02876901
## Run 20 stress 0.02876359
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 15: no. of iterations >= maxit
## 5: stress ratio > sratmax
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2432883
## Run 1 stress 0.2005041
## ... New best solution
## ... Procrustes: rmse 0.1797811 max resid 0.4073555
## Run 2 stress 0.1492792
## ... New best solution
## ... Procrustes: rmse 0.09066636 max resid 0.2187461
## Run 3 stress 0.1982219
## Run 4 stress 0.275381
## Run 5 stress 0.2736742
## Run 6 stress 0.2763352
## Run 7 stress 0.2014731
## Run 8 stress 0.2099927
## Run 9 stress 0.2563287
## Run 10 stress 0.287374
## Run 11 stress 0.1414606
## ... New best solution
## ... Procrustes: rmse 0.02879065 max resid 0.05849106
## Run 12 stress 0.2872084
## Run 13 stress 0.2777393
## Run 14 stress 0.2013436
## Run 15 stress 0.1444795
## Run 16 stress 0.1875126
## Run 17 stress 0.184394
## Run 18 stress 0.217751
## Run 19 stress 0.1955774
## Run 20 stress 0.2097698
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.06870377
## Run 1 stress 0.06870354
## ... New best solution
## ... Procrustes: rmse 0.0001440208 max resid 0.0004140217
## ... Similar to previous best
## Run 2 stress 0.06596665
## ... New best solution
## ... Procrustes: rmse 0.02617494 max resid 0.06023465
## Run 3 stress 0.07348368
## Run 4 stress 0.06870362
## Run 5 stress 0.06870361
## Run 6 stress 0.1069169
## Run 7 stress 0.06596584
## ... New best solution
## ... Procrustes: rmse 0.0006891548 max resid 0.002076878
## ... Similar to previous best
## Run 8 stress 0.07990749
## Run 9 stress 0.07990756
## Run 10 stress 0.07990764
## Run 11 stress 0.06596685
## ... Procrustes: rmse 0.002113794 max resid 0.006364691
## ... Similar to previous best
## Run 12 stress 0.06870334
## Run 13 stress 0.07348402
## Run 14 stress 0.0734835
## Run 15 stress 0.07990781
## Run 16 stress 0.104344
## Run 17 stress 0.07990768
## Run 18 stress 0.06596699
## ... Procrustes: rmse 0.0008321829 max resid 0.002505762
## ... Similar to previous best
## Run 19 stress 0.09258065
## Run 20 stress 0.1570495
## *** Best solution repeated 3 times
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.02275913
## Run 1 stress 0.02212176
## ... New best solution
## ... Procrustes: rmse 0.1038378 max resid 0.2945243
## Run 2 stress 0.02221842
## ... Procrustes: rmse 0.09055185 max resid 0.2602679
## Run 3 stress 0.02178296
## ... New best solution
## ... Procrustes: rmse 0.05550052 max resid 0.1569568
## Run 4 stress 0.02185097
## ... Procrustes: rmse 0.01155222 max resid 0.03275711
## Run 5 stress 0.02360378
## Run 6 stress 0.02181676
## ... Procrustes: rmse 0.005035596 max resid 0.01386529
## Run 7 stress 0.02197213
## ... Procrustes: rmse 0.03701802 max resid 0.1039729
## Run 8 stress 0.02176694
## ... New best solution
## ... Procrustes: rmse 0.004928824 max resid 0.0138589
## Run 9 stress 0.02202521
## ... Procrustes: rmse 0.0396489 max resid 0.1124044
## Run 10 stress 0.02375199
## Run 11 stress 0.02198844
## ... Procrustes: rmse 0.03684882 max resid 0.104643
## Run 12 stress 0.02212597
## ... Procrustes: rmse 0.05217235 max resid 0.1496327
## Run 13 stress 0.02399484
## Run 14 stress 0.02405759
## Run 15 stress 0.02251671
## Run 16 stress 0.02442058
## Run 17 stress 0.02177125
## ... Procrustes: rmse 0.005619633 max resid 0.01568863
## Run 18 stress 0.02196946
## ... Procrustes: rmse 0.03984433 max resid 0.1133911
## Run 19 stress 0.0231586
## Run 20 stress 0.02259384
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: no. of iterations >= maxit
## [1] 0.14146056 0.06596584 0.02176694
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2432883
## Run 1 stress 0.1444243
## ... New best solution
## ... Procrustes: rmse 0.1433319 max resid 0.4074722
## Run 2 stress 0.1911551
## Run 3 stress 0.1986083
## Run 4 stress 0.1444243
## ... Procrustes: rmse 1.150888e-06 max resid 2.916415e-06
## ... Similar to previous best
## Run 5 stress 0.292168
## Run 6 stress 0.1414606
## ... New best solution
## ... Procrustes: rmse 0.02249834 max resid 0.05662958
## Run 7 stress 0.2915288
## Run 8 stress 0.2593832
## Run 9 stress 0.2322329
## Run 10 stress 0.2535363
## Run 11 stress 0.3076013
## Run 12 stress 0.2098618
## Run 13 stress 0.2961442
## Run 14 stress 0.3030775
## Run 15 stress 0.1451452
## Run 16 stress 0.4792608
## Run 17 stress 0.1420164
## Run 18 stress 0.290279
## Run 19 stress 0.2904474
## Run 20 stress 0.2234517
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.06870377
## Run 1 stress 0.06870312
## ... New best solution
## ... Procrustes: rmse 0.0004465134 max resid 0.001306033
## ... Similar to previous best
## Run 2 stress 0.09477481
## Run 3 stress 0.1083853
## Run 4 stress 0.1083853
## Run 5 stress 0.1607804
## Run 6 stress 0.1021777
## Run 7 stress 0.07990651
## Run 8 stress 0.1043544
## Run 9 stress 0.06596676
## ... New best solution
## ... Procrustes: rmse 0.02612557 max resid 0.06033822
## Run 10 stress 0.07348395
## Run 11 stress 0.07990727
## Run 12 stress 0.1045591
## Run 13 stress 0.07348373
## Run 14 stress 0.07990686
## Run 15 stress 0.06596657
## ... New best solution
## ... Procrustes: rmse 0.002663905 max resid 0.00802223
## ... Similar to previous best
## Run 16 stress 0.06596657
## ... New best solution
## ... Procrustes: rmse 6.033653e-06 max resid 1.788869e-05
## ... Similar to previous best
## Run 17 stress 0.1570494
## Run 18 stress 0.07348288
## Run 19 stress 0.1043544
## Run 20 stress 0.06596669
## ... Procrustes: rmse 0.0001388569 max resid 0.0004061346
## ... Similar to previous best
## *** Best solution repeated 2 times
## [1] 0.14146056 0.06596657
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2432883
## Run 1 stress 0.2533715
## Run 2 stress 0.5149356
## Run 3 stress 0.2576789
## Run 4 stress 0.2943772
## Run 5 stress 0.2008392
## ... New best solution
## ... Procrustes: rmse 0.1852718 max resid 0.3966593
## Run 6 stress 0.2259715
## Run 7 stress 0.4019818
## Run 8 stress 0.4106443
## Run 9 stress 0.3086709
## Run 10 stress 0.1745059
## ... New best solution
## ... Procrustes: rmse 0.1009503 max resid 0.2123004
## Run 11 stress 0.2515758
## Run 12 stress 0.2005041
## Run 13 stress 0.4735085
## Run 14 stress 0.2767366
## Run 15 stress 0.1444243
## ... New best solution
## ... Procrustes: rmse 0.04424901 max resid 0.1330738
## Run 16 stress 0.2728852
## Run 17 stress 0.1444243
## ... New best solution
## ... Procrustes: rmse 1.842742e-07 max resid 4.889771e-07
## ... Similar to previous best
## Run 18 stress 0.1444243
## ... Procrustes: rmse 1.002658e-06 max resid 2.108346e-06
## ... Similar to previous best
## Run 19 stress 0.1639899
## Run 20 stress 0.2844137
## *** Best solution repeated 2 times
## [1] 0.1444243
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.06870377
## Run 1 stress 0.06596635
## ... New best solution
## ... Procrustes: rmse 0.02622634 max resid 0.06013882
## Run 2 stress 0.06596725
## ... Procrustes: rmse 0.0005417882 max resid 0.001633643
## ... Similar to previous best
## Run 3 stress 0.07990656
## Run 4 stress 0.1575941
## Run 5 stress 0.06870363
## Run 6 stress 0.06870464
## Run 7 stress 0.06596642
## ... Procrustes: rmse 0.002336689 max resid 0.007037219
## ... Similar to previous best
## Run 8 stress 0.0799073
## Run 9 stress 0.06874223
## Run 10 stress 0.09477508
## Run 11 stress 0.06596641
## ... Procrustes: rmse 3.240541e-05 max resid 8.997533e-05
## ... Similar to previous best
## Run 12 stress 0.0687033
## Run 13 stress 0.07348406
## Run 14 stress 0.07990703
## Run 15 stress 0.06596706
## ... Procrustes: rmse 0.00272763 max resid 0.008213281
## ... Similar to previous best
## Run 16 stress 0.07990725
## Run 17 stress 0.07990697
## Run 18 stress 0.06596672
## ... Procrustes: rmse 0.0002444276 max resid 0.0007368619
## ... Similar to previous best
## Run 19 stress 0.09258103
## Run 20 stress 0.0734832
## *** Best solution repeated 5 times
#### PRINT NMDS RESULTS ####
MMDS <- NMDS_list[[1]]
print(MMDS)
##
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(pivoted_data[, -1]))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.07798004
## Stress type 1, weak ties
## Best solution was repeated 7 times in 20 tries
## The best solution was from try 4 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(pivoted_data[, -1]))'
MMDSL <- NMDS_list[[2]]
print(MMDSL)
##
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(pivoted_data[, -1])
## Distance: bray
##
## Dimensions: 2
## Stress: 8.93295e-05
## Stress type 1, weak ties
## Best solution was not repeated after 20 tries
## The best solution was from try 15 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(pivoted_data[, -1])'
MMDSM <- NMDS_list[[3]]
print(MMDSM)
##
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(pivoted_data[, -1]))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.02644227
## Stress type 1, weak ties
## Best solution was not repeated after 20 tries
## The best solution was from try 5 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(pivoted_data[, -1]))'
MMDSS <- NMDS_list[[4]]
print(MMDSS)
##
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2)
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(pivoted_data[, -1]))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.06596635
## Stress type 1, weak ties
## Best solution was repeated 5 times in 20 tries
## The best solution was from try 1 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(pivoted_data[, -1]))'
Diversity indices
#### SIMPSON INDEX ####
Singles_pivoted_data # view data including singletons
## # A tibble: 14 × 155
## habitat_type HDP HEU HLL KAO KAS KDA KDT KGC KIE KLC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 established 6 2 1 2 0 1 0 5 29 0
## 2 established 0 3 0 2 0 3 1 6 17 0
## 3 established 0 0 0 2 0 0 0 2 21 1
## 4 established 0 0 0 1 0 11 0 1 5 0
## 5 established 0 0 0 0 8 0 0 1 14 0
## 6 marantaceae 3 0 0 0 0 1 0 0 0 0
## 7 marantaceae 0 0 0 0 0 1 0 0 3 0
## 8 marantaceae 0 0 0 0 0 0 0 0 0 0
## 9 marantaceae 0 0 0 0 0 0 0 0 0 0
## 10 marantaceae 0 0 0 0 0 0 0 0 0 0
## 11 riverine 2 0 0 0 0 0 0 0 8 0
## 12 riverine 1 0 2 0 11 0 0 0 1 0
## 13 riverine 0 0 0 0 1 0 0 0 0 0
## 14 riverine 0 0 0 0 0 0 0 0 0 0
## # ℹ 144 more variables: KOS <dbl>, KPE <dbl>, KPS <dbl>, KUG <dbl>, KXS <dbl>,
## # OBB <dbl>, OBC <dbl>, OBH <dbl>, OBR <dbl>, OCD <dbl>, OCE <dbl>,
## # OCL <dbl>, OCP <dbl>, OCZ <dbl>, ODC <dbl>, ODD <dbl>, ODE <dbl>,
## # ODO <dbl>, ODW <dbl>, ODY <dbl>, OEE <dbl>, OHF <dbl>, OIL <dbl>,
## # OIT <dbl>, OKA <dbl>, OLL <dbl>, OLP <dbl>, OMA <dbl>, OML <dbl>,
## # OMT <dbl>, OMU <dbl>, OMY <dbl>, ONA <dbl>, OOA <dbl>, OOO <dbl>,
## # OPA <dbl>, OPE <dbl>, OPN <dbl>, OPP <dbl>, OPY <dbl>, ORY <dbl>, …
simpson_index <- diversity(Singles_pivoted_data[,2:155], index = "simpson") # all numerical data run as a shannon index
simpson_index # view
## [1] 0.7659042 0.8188579 0.8809521 0.8876395 0.7836887 0.8751486 0.7898439
## [8] 0.5553086 0.4444444 0.5000000 0.7701952 0.8061702 0.7114419 0.9018444
# create a new data frame with the Shannon index data
simpson_df <- data.frame(simpson_index)
# add new data frame as a new column
Simpson_Indexes <- cbind(Singles_pivoted_data[1], simpson_df)
# plot
ggplot(data = Simpson_Indexes, aes(x = habitat_type, y = simpson_index, fill = habitat_type)) +
geom_boxplot(color = "black") +
scale_fill_manual(values = c('plum3', 'darkseagreen2', 'chartreuse4')) +
labs(title = "Box Plot of Simpson Index by Habitat Type",
x = "Habitat Type",
y = "Simpson Index")
#### SHANNON INDEX ####
Singles_pivoted_data # view data including singletons
## # A tibble: 14 × 155
## habitat_type HDP HEU HLL KAO KAS KDA KDT KGC KIE KLC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 established 6 2 1 2 0 1 0 5 29 0
## 2 established 0 3 0 2 0 3 1 6 17 0
## 3 established 0 0 0 2 0 0 0 2 21 1
## 4 established 0 0 0 1 0 11 0 1 5 0
## 5 established 0 0 0 0 8 0 0 1 14 0
## 6 marantaceae 3 0 0 0 0 1 0 0 0 0
## 7 marantaceae 0 0 0 0 0 1 0 0 3 0
## 8 marantaceae 0 0 0 0 0 0 0 0 0 0
## 9 marantaceae 0 0 0 0 0 0 0 0 0 0
## 10 marantaceae 0 0 0 0 0 0 0 0 0 0
## 11 riverine 2 0 0 0 0 0 0 0 8 0
## 12 riverine 1 0 2 0 11 0 0 0 1 0
## 13 riverine 0 0 0 0 1 0 0 0 0 0
## 14 riverine 0 0 0 0 0 0 0 0 0 0
## # ℹ 144 more variables: KOS <dbl>, KPE <dbl>, KPS <dbl>, KUG <dbl>, KXS <dbl>,
## # OBB <dbl>, OBC <dbl>, OBH <dbl>, OBR <dbl>, OCD <dbl>, OCE <dbl>,
## # OCL <dbl>, OCP <dbl>, OCZ <dbl>, ODC <dbl>, ODD <dbl>, ODE <dbl>,
## # ODO <dbl>, ODW <dbl>, ODY <dbl>, OEE <dbl>, OHF <dbl>, OIL <dbl>,
## # OIT <dbl>, OKA <dbl>, OLL <dbl>, OLP <dbl>, OMA <dbl>, OML <dbl>,
## # OMT <dbl>, OMU <dbl>, OMY <dbl>, ONA <dbl>, OOA <dbl>, OOO <dbl>,
## # OPA <dbl>, OPE <dbl>, OPN <dbl>, OPP <dbl>, OPY <dbl>, ORY <dbl>, …
shannon_index <- diversity(Singles_pivoted_data[,2:155], index = "shannon") # all numerical data run as a shannon index
shannon_index # view
## [1] 2.1687089 2.3953766 2.6167537 2.7484724 2.1668122 2.3029725 2.0961954
## [8] 1.2115170 0.6365142 0.8675632 2.0959856 2.1820347 1.7718222 2.7729609
# new data frame with the Shannon index data
shannon_df <- data.frame(shannon_index)
# add new data frame as a new column
Shannon_Indexes <- cbind(Singles_pivoted_data[1], shannon_df)
# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/shannon.pdf"
# Open a PDF device
#pdf(pdf_path)
# plot
ggplot(data = Shannon_Indexes, aes(x = habitat_type, y = shannon_index, fill = habitat_type)) +
geom_boxplot(color = "black") +
scale_fill_manual(values = c('plum3', 'darkseagreen2', 'chartreuse4')) +
labs(title = "Box Plot of Shannon Index by Habitat Type",
x = "Habitat Type",
y = "Shannon Index")
#dev.off() #for saving
#### AUTOCORRELATION ####
# none for simpson
established <- Simpson_Indexes[1:5,2]
riverine <- Simpson_Indexes[11:14,2]
marantaceae <- Simpson_Indexes[6:10,2]
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3671881
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.4176077
##
## $p.value
## [1] 0.9353877
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3346759
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.2153944
##
## $p.value
## [1] 0.6942307
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3293823
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.5419306
##
## $p.value
## [1] 0.883542
# none for shannon
established <- Shannon_Indexes[1:5,2]
riverine <- Shannon_Indexes[11:14,2]
marantaceae <- Shannon_Indexes[6:10,2]
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.2288976
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.3476527
##
## $p.value
## [1] 0.7638702
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3581802
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.219035
##
## $p.value
## [1] 0.6213805
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.5202688
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.5279465
##
## $p.value
## [1] 0.6087038
#### ANOVA ASSUMPTIONS ####
# established
shapiro_test_resultEST <- shapiro.test(established)
# display the result
print(shapiro_test_resultEST)
##
## Shapiro-Wilk normality test
##
## data: established
## W = 0.89179, p-value = 0.3662
# riv
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
print(shapiro_test_resultRIV)
##
## Shapiro-Wilk normality test
##
## data: riverine
## W = 0.94389, p-value = 0.6782
# mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
print(shapiro_test_resultMAR)
##
## Shapiro-Wilk normality test
##
## data: marantaceae
## W = 0.89645, p-value = 0.3906
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(shannon_index ~ habitat_type, data = Shannon_Indexes)
# display the result
print(bartlett_test_result)
##
## Bartlett test of homogeneity of variances
##
## data: shannon_index by habitat_type
## Bartlett's K-squared = 3.6012, df = 2, p-value = 0.1652
#### KW TEST ####
kw_result <- kruskal.test(simpson_index ~ habitat_type, data = Simpson_Indexes)
# display test results
print(kw_result)
##
## Kruskal-Wallis rank sum test
##
## data: simpson_index by habitat_type
## Kruskal-Wallis chi-squared = 2.9457, df = 2, p-value = 0.2293
# post hoc with Dunn's
dunn_result <- dunn.test(Simpson_Indexes$simpson_index, g = Simpson_Indexes$habitat_type, method = "bonferroni")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 2.9457, df = 2, p-value = 0.23
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | 1.663043
## | 0.1445
## |
## riverine | 0.409800 -1.158132
## | 1.0000 0.3702
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# results
print(dunn_result)
## $chi2
## [1] 2.945714
##
## $Z
## [1] 1.6630437 0.4098006 -1.1581320
##
## $P
## [1] 0.04815185 0.34097612 0.12340508
##
## $P.adjusted
## [1] 0.1444555 1.0000000 0.3702152
##
## $comparisons
## [1] "established - marantaceae" "established - riverine"
## [3] "marantaceae - riverine"
# KW test
kw_result <- kruskal.test(shannon_index ~ habitat_type, data = Shannon_Indexes)
# display test results
print(kw_result)
##
## Kruskal-Wallis rank sum test
##
## data: shannon_index by habitat_type
## Kruskal-Wallis chi-squared = 4.8857, df = 2, p-value = 0.08691
# post hoc with Dunn's
dunn_result <- dunn.test(Shannon_Indexes$shannon_index, g = Shannon_Indexes$habitat_type, method = "bonferroni")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 4.8857, df = 2, p-value = 0.09
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | 2.192193
## | 0.0425
## |
## riverine | 0.783966 -1.282853
## | 0.6496 0.2993
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# display
print(dunn_result)
## $chi2
## [1] 4.885714
##
## $Z
## [1] 2.1921939 0.7839663 -1.2828540
##
## $P
## [1] 0.01418275 0.21652994 0.09977162
##
## $P.adjusted
## [1] 0.04254826 0.64958982 0.29931487
##
## $comparisons
## [1] "established - marantaceae" "established - riverine"
## [3] "marantaceae - riverine"
DBH
#### GET DATA ####
# habitats
selected_habitats <- c("marantaceae", "riverine", "established")
# subset based on the habitats
subset_DBH <- subset(DBH, Type %in% selected_habitats)
#### HISTOGRAMS OF DATA ####
# color scheme
habitat_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')
## with standard X but nonstandard y
# histograms of DBH for each plot, color coding by habitat
ggplot(subset_DBH, aes(x = DBH, fill = Type)) +
geom_histogram(binwidth = 5, position = "identity", alpha = 0.7) +
facet_wrap(~Plot, scales = "free_y") +
labs(title = "Histograms of DBH by Habitat",
x = "DBH",
y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = scales::comma) +
coord_cartesian(xlim = c(0, 100)) +
scale_fill_manual(values = habitat_colors)
## standard x and y
# histograms of DBH for each plot, color coding by habitat
ggplot(subset_DBH, aes(x = DBH, fill = Type)) +
geom_histogram(binwidth = 5, position = "identity", alpha = 0.7) +
facet_wrap(~Plot, scales = "free_y") +
labs(title = "Histograms of DBH by Habitat",
x = "DBH",
y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = scales::comma, limits = c(0, 30)) +
coord_cartesian(xlim = c(0, 100)) +
scale_fill_manual(values = habitat_colors)
## Warning: Removed 1 rows containing missing values (`geom_bar()`).
## combined histograms
# 3 histograms of DBH for each habitat type
# file path to save
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/DBHHists.pdf"
#pdf(pdf_path)
ggplot(subset_DBH, aes(x = DBH, fill = Type)) +
geom_histogram(binwidth = 5, position = "identity", alpha = 0.7) +
facet_grid(Type ~ ., scales = "free_y", space = "free_y") +
labs(title = "Histograms of DBH by Habitat",
x = "DBH",
y = "Frequency") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = scales::comma) +
coord_cartesian(xlim = c(0, 100), ylim = c(0, 30)) +
scale_fill_manual(values = habitat_colors)
#dev.off() #for saving
#### KS TESTS BETWEENE HABITATS ####
# for each pair of habitat type distributions
habitat_types <- unique(subset_DBH$Type)
ks_test_results <- matrix(NA, nrow = length(habitat_types), ncol = length(habitat_types), dimnames = list(habitat_types, habitat_types))
ks_test_resultsSTATS <- matrix(NA, nrow = length(habitat_types), ncol = length(habitat_types), dimnames = list(habitat_types, habitat_types))
for (i in 1:(length(habitat_types) - 1)) {
for (j in (i + 1):length(habitat_types)) {
ks_test_results[i, j] <- ks.test(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],subset_DBH$DBH[subset_DBH$Type == habitat_types[j]])$p.value
ks_test_results[j, i] <- ks_test_results[i, j]
ks_test_resultsSTATS[i, j] <- ks.test(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],subset_DBH$DBH[subset_DBH$Type == habitat_types[j]])$statistic
ks_test_resultsSTATS[j, i] <- ks_test_resultsSTATS[i, j]
}
}
## Warning in ks.test.default(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],
## : p-value will be approximate in the presence of ties
## Warning in ks.test.default(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],
## : p-value will be approximate in the presence of ties
# print the results
print(ks_test_results)
## riverine established marantaceae
## riverine NA 0.0064858967 2.205816e-06
## established 6.485897e-03 NA 5.583663e-04
## marantaceae 2.205816e-06 0.0005583663 NA
# all p's less than 0.05. distrubutions significantly different
# print the test statistic results
print(ks_test_resultsSTATS)
## riverine established marantaceae
## riverine NA 0.1888528 0.5364286
## established 0.1888528 NA 0.4202597
## marantaceae 0.5364286 0.4202597 NA
#### KS TESTS WITHIN HABITATS ####
## riverine
riverine_plot_ids <- unique(subset_DBH$Plot[subset_DBH$Type == "riverine"])
ks_within_plot_results <- matrix(NA, nrow = length(riverine_plot_ids), ncol = length(riverine_plot_ids), dimnames = list(riverine_plot_ids, riverine_plot_ids))
ks_within_plot_results <- matrix(NA, nrow = length(riverine_plot_ids), ncol = length(riverine_plot_ids), dimnames = list(riverine_plot_ids, riverine_plot_ids))
ks_within_plot_resultsSTATS <- matrix(NA, nrow = length(riverine_plot_ids), ncol = length(riverine_plot_ids), dimnames = list(riverine_plot_ids, riverine_plot_ids))
for (i in 1:(length(riverine_plot_ids) - 1)) {
for (j in (i + 1):length(riverine_plot_ids)) {
subset_i <- subset_DBH$DBH[subset_DBH$Type == "riverine" & subset_DBH$Plot == riverine_plot_ids[i]]
subset_j <- subset_DBH$DBH[subset_DBH$Type == "riverine" & subset_DBH$Plot == riverine_plot_ids[j]]
# data in both subsets?
if (length(subset_i) > 1 && length(subset_j) > 1) {
ks_within_plot_results[i, j] <- ks.test(subset_i, subset_j)$p.value
ks_within_plot_results[j, i] <- ks_within_plot_results[i, j]
ks_within_plot_resultsSTATS[i, j] <- ks.test(subset_i, subset_j)$statistic
ks_within_plot_resultsSTATS[j, i] <- ks_within_plot_resultsSTATS[i, j]
} else {
ks_within_plot_results[i, j] <- NA
ks_within_plot_results[j, i] <- NA
ks_within_plot_resultsSTATS[i, j] <- NA
}
}
}
# print results
print("KS test within plots for riverine:")
## [1] "KS test within plots for riverine:"
print(ks_within_plot_results)
## R5 R20 R4 R30
## R5 NA 0.14554533 0.0007334696 0.749510182
## R20 0.1455453330 NA 0.0740297605 0.098220073
## R4 0.0007334696 0.07402976 NA 0.008129542
## R30 0.7495101823 0.09822007 0.0081295418 NA
print(ks_within_plot_resultsSTATS)
## R5 R20 R4 R30
## R5 NA 0.2428571 0.3784083 0.1459627
## R20 0.2428571 NA 0.2610169 0.3000000
## R4 0.3784083 0.2610169 NA 0.3644068
## R30 0.1459627 0.3000000 0.3644068 NA
## marantaceae
marantaceae_plot_ids <- unique(subset_DBH$Plot[subset_DBH$Type == "marantaceae"])
ks_within_plot_results <- matrix(NA, nrow = length(marantaceae_plot_ids), ncol = length(marantaceae_plot_ids), dimnames = list(marantaceae_plot_ids, marantaceae_plot_ids))
ks_within_plot_resultsSTATS <- matrix(NA, nrow = length(marantaceae_plot_ids), ncol = length(marantaceae_plot_ids), dimnames = list(marantaceae_plot_ids, marantaceae_plot_ids))
for (i in 1:(length(marantaceae_plot_ids) - 1)) {
for (j in (i + 1):length(marantaceae_plot_ids)) {
subset_i <- subset_DBH$DBH[subset_DBH$Type == "marantaceae" & subset_DBH$Plot == marantaceae_plot_ids[i]]
subset_j <- subset_DBH$DBH[subset_DBH$Type == "marantaceae" & subset_DBH$Plot == marantaceae_plot_ids[j]]
ks_within_plot_results[i, j] <- ks.test(subset_i, subset_j)$p.value
ks_within_plot_results[j, i] <- ks_within_plot_results[i, j]
ks_within_plot_resultsSTATS[i, j] <- ks.test(subset_i, subset_j)$statistic
ks_within_plot_resultsSTATS[j, i] <- ks_within_plot_resultsSTATS[i, j]
}
}
# print results
print("KS test within plots for marantaceae:")
## [1] "KS test within plots for marantaceae:"
print(ks_within_plot_results)
## M2 M4 M1 M3 M5
## M2 NA 1.0000000 0.5714286 0.7575758 0.7500000
## M4 1.0000000 NA 0.3333333 0.1818182 0.5000000
## M1 0.5714286 0.3333333 NA 0.1658342 0.1666667
## M3 0.7575758 0.1818182 0.1658342 NA 0.5602118
## M5 0.7500000 0.5000000 0.1666667 0.5602118 NA
print(ks_within_plot_resultsSTATS)
## M2 M4 M1 M3 M5
## M2 NA 0.5000000 0.6 0.5000000 0.5000000
## M4 0.5 NA 1.0 1.0000000 0.8571429
## M1 0.6 1.0000000 NA 0.6000000 0.6000000
## M3 0.5 1.0000000 0.6 NA 0.3571429
## M5 0.5 0.8571429 0.6 0.3571429 NA
## established/dense
established_plot_ids <- unique(subset_DBH$Plot[subset_DBH$Type == "established"])
ks_within_plot_results <- matrix(NA, nrow = length(established_plot_ids), ncol = length(established_plot_ids), dimnames = list(established_plot_ids, established_plot_ids))
ks_within_plot_resultsSTATS <- matrix(NA, nrow = length(established_plot_ids), ncol = length(established_plot_ids), dimnames = list(established_plot_ids, established_plot_ids))
for (i in 1:(length(established_plot_ids) - 1)) {
for (j in (i + 1):length(established_plot_ids)) {
subset_i <- subset_DBH$DBH[subset_DBH$Type == "established" & subset_DBH$Plot == established_plot_ids[i]]
subset_j <- subset_DBH$DBH[subset_DBH$Type == "established" & subset_DBH$Plot == established_plot_ids[j]]
# enough data in both subsets?
if (length(subset_i) > 1 && length(subset_j) > 1) {
ks_within_plot_results[i, j] <- ks.test(subset_i, subset_j)$p.value
ks_within_plot_results[j, i] <- ks_within_plot_results[i, j]
# get test statistic
ks_within_plot_resultsSTATS[i, j] <- ks.test(subset_i, subset_j)$statistic
ks_within_plot_resultsSTATS[j, i] <- ks_within_plot_resultsSTATS[i, j]
} else {
ks_within_plot_results[i, j] <- NA
ks_within_plot_results[j, i] <- NA
ks_within_plot_resultsSTATS[i, j] <- NA
}
}
}
# results
print("KS test within plots for established:")
## [1] "KS test within plots for established:"
print(ks_within_plot_results)
## F110 F200 F300 F330 F100
## F110 NA 0.005039835 0.1404444 0.0104236 0.08354686
## F200 0.005039835 NA 0.1687311 0.9516712 0.68068988
## F300 0.140444431 0.168731140 NA 0.3601492 0.23567284
## F330 0.010423597 0.951671239 0.3601492 NA 0.72178240
## F100 0.083546859 0.680689882 0.2356728 0.7217824 NA
print(ks_within_plot_resultsSTATS)
## F110 F200 F300 F330 F100
## F110 NA 0.4028595 0.3596059 0.3876980 0.3040752
## F200 0.4028595 NA 0.3257840 0.1048121 0.1552106
## F300 0.3596059 0.3257840 NA 0.2722008 0.3095238
## F330 0.3876980 0.1048121 0.2722008 NA 0.1531532
## F100 0.3040752 0.1552106 0.3095238 0.1531532 NA
# Create a custom color palette for habitat types
habitat_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')
#### 12-BOX BOXPLOT ####
# box plot for each plot by habitat
#ggplot(subset_DBH, aes(x = factor(Plot), y = DBH, fill = Type)) +
# geom_boxplot() +
#facet_wrap(~Type, scales = "free") +
#labs(title = "Box Plots of DBH by Plot and Habitat",
# x = "Plot",
# y = "DBH") +
# scale_y_continuous(limits = c(0, 120)) + # y-axis limits standard
# scale_fill_manual(values = habitat_colors) + colors()
#theme_minimal()
# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/DBHBox.pdf"
# Open a PDF device
#pdf(pdf_path)
# 1 box plot per habitat type
#ggplot(subset_DBH, aes(x = factor(Type), y = DBH, fill = Type)) +
# geom_boxplot() +
#labs(title = "Box Plots of DBH by Habitat",
# x = "Habitat Type",
# y = "DBH") +
# scale_y_continuous(limits = c(0, 120)) + # limits
# scale_fill_manual(values = habitat_colors) + colors
#theme_minimal()
#dev.off() #for saving
#### KRUSKAL WALLIS TEST ####
kruskal_test_result <- kruskal.test(DBH ~ Type, data = subset_DBH)
# print results
print("Kruskal-Wallis Test:")
## [1] "Kruskal-Wallis Test:"
print(kruskal_test_result)
##
## Kruskal-Wallis rank sum test
##
## data: DBH by Type
## Kruskal-Wallis chi-squared = 29.83, df = 2, p-value = 3.33e-07
# Post-hoc Dunn's test with Bonferroni correction
posthoc_dunn <- dunn.test(subset_DBH$DBH, subset_DBH$Type, method = "bonferroni")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 29.83, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | establis marantac
## ---------+----------------------
## marantac | -3.381617
## | 0.0011*
## |
## riverine | 3.216846 5.075600
## | 0.0019* 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# Print the post-hoc test result
print("Post-hoc Dunn's Test:")
## [1] "Post-hoc Dunn's Test:"
print(posthoc_dunn)
## $chi2
## [1] 29.83004
##
## $Z
## [1] -3.381618 3.216846 5.075600
##
## $P
## [1] 3.603017e-04 6.480404e-04 1.931374e-07
##
## $P.adjusted
## [1] 1.080905e-03 1.944121e-03 5.794121e-07
##
## $comparisons
## [1] "established - marantaceae" "established - riverine"
## [3] "marantaceae - riverine"
## autocorr... no autocor
# Calculate mean values for each combination of Plot and Type
mean_values <- subset_DBH %>%
group_by(Plot, Type) %>%
summarize(mean_DBH = mean(DBH, na.rm = TRUE))
## `summarise()` has grouped output by 'Plot'. You can override using the
## `.groups` argument.
established <- mean_values[1:5,3]
established <- established$mean_DBH # to a vector
riverine <- mean_values[11:14,3]
riverine <- riverine$mean_DBH # to a vector
marantaceae <- mean_values[6:10,3]
marantaceae <- marantaceae$mean_DBH # to a vector
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3518021
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.2301075
##
## $p.value
## [1] 0.9360292
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.06408652
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.1477366
##
## $p.value
## [1] 0.2082429
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.1295925
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.2097542
##
## $p.value
## [1] 0.5659402
#### ANOVA ASSUMPTIONS ####
# established
shapiro_test_resultEST <- shapiro.test(established)
# display the result
print(shapiro_test_resultEST)
##
## Shapiro-Wilk normality test
##
## data: established
## W = 0.75547, p-value = 0.03342
# riv
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
print(shapiro_test_resultRIV)
##
## Shapiro-Wilk normality test
##
## data: riverine
## W = 0.82671, p-value = 0.1594
# mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
print(shapiro_test_resultMAR)
##
## Shapiro-Wilk normality test
##
## data: marantaceae
## W = 0.94563, p-value = 0.706
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(mean_DBH ~ Type, data = mean_values)
# display the result
print(bartlett_test_result)
##
## Bartlett test of homogeneity of variances
##
## data: mean_DBH by Type
## Bartlett's K-squared = 12.072, df = 2, p-value = 0.002391
Stem density
#### DBH-ABLE TREES STEM DENSITIES ####
# calculate stem density per plot by counting the number of rows for each plot in each habitat type
stem_density <- subset_DBH %>%
group_by(Plot, Type) %>%
summarise(stem_count = n())
## `summarise()` has grouped output by 'Plot'. You can override using the
## `.groups` argument.
# print dataframe result
print(stem_density)
## # A tibble: 14 × 3
## # Groups: Plot [14]
## Plot Type stem_count
## <chr> <chr> <int>
## 1 F100 established 33
## 2 F110 established 29
## 3 F200 established 41
## 4 F300 established 14
## 5 F330 established 37
## 6 M1 marantaceae 5
## 7 M2 marantaceae 2
## 8 M3 marantaceae 10
## 9 M4 marantaceae 1
## 10 M5 marantaceae 7
## 11 R20 riverine 35
## 12 R30 riverine 28
## 13 R4 riverine 59
## 14 R5 riverine 46
# plot stem densities -- calculate average stem density per plot by habitat
average_density <- stem_density %>%
group_by(Type) %>%
summarise(avg_density = mean(stem_count))
average_density # view
## # A tibble: 3 × 2
## Type avg_density
## <chr> <dbl>
## 1 established 30.8
## 2 marantaceae 5
## 3 riverine 42
# to save the PDF
pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/stemD.pdf"
pdf(pdf_path)
# plot average stem density per plot by habitat as a box plot
ggplot(stem_density, aes(x = Type, y = stem_count, fill = Type)) +
geom_boxplot() +
labs(title = "Average Stem Density per Plot by Habitat (Box Plot)",
x = "Habitat Type",
y = "Stem Density",
fill = "Habitat Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = habitat_colors)
dev.off()
## quartz_off_screen
## 2
#### ANOVA ASSUMPTIONS ####
marantaceae <- stem_density$stem_count[stem_density$Type == "marantaceae"]
riverine <- stem_density$stem_count[stem_density$Type == "riverine"]
established <- stem_density$stem_count[stem_density$Type == "established"]
# mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
print(shapiro_test_resultMAR)
##
## Shapiro-Wilk normality test
##
## data: marantaceae
## W = 0.95695, p-value = 0.7866
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
print(shapiro_test_resultRIV)
##
## Shapiro-Wilk normality test
##
## data: riverine
## W = 0.97313, p-value = 0.8608
shapiro_test_resultEST <- shapiro.test(established)
# display the result
print(shapiro_test_resultEST)
##
## Shapiro-Wilk normality test
##
## data: established
## W = 0.9128, p-value = 0.4846
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(stem_count ~ Type, data = stem_density)
# display the result
print(bartlett_test_result)
##
## Bartlett test of homogeneity of variances
##
## data: stem_count by Type
## Bartlett's K-squared = 4.65, df = 2, p-value = 0.09778
#### ANOVA ####
# ANOVA
anova_result <- aov(stem_count ~ Type, data = stem_density)
# Print ANOVA result
print("ANOVA:")
## [1] "ANOVA:"
print(summary(anova_result))
## Df Sum Sq Mean Sq F value Pr(>F)
## Type 2 3324 1661.8 17.63 0.000371 ***
## Residuals 11 1037 94.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Post-hoc Tukey test
posthoc_tukey <- TukeyHSD(anova_result)
# Print post-hoc test result
print("Post-hoc Tukey Test:")
## [1] "Post-hoc Tukey Test:"
print(posthoc_tukey)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = stem_count ~ Type, data = stem_density)
##
## $Type
## diff lwr upr p adj
## marantaceae-established -25.8 -42.383748 -9.216252 0.0038769
## riverine-established 11.2 -6.389721 28.789721 0.2414447
## riverine-marantaceae 37.0 19.410279 54.589721 0.0003827
## autocor. mar has. the rest do not
established <- stem_density[1:5,3]
established <- established$stem_count # to a vector
riverine <- stem_density[11:14,3]
riverine <- riverine$stem_count # to a vector
marantaceae <- stem_density[6:10,3]
marantaceae <- marantaceae$stem_count # to a vector
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.8200796
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.5291706
##
## $p.value
## [1] 0.3576623
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] 0.1417477
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.1986743
##
## $p.value
## [1] 0.04863142
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.5565877
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.3574661
##
## $p.value
## [1] 0.391075
APPLY WD VAR TO DBH !DO NOT RUN!
#SDBH <- subset_DBH
#SDBH$WD <- NA # initialize WD column
#SDBH$var <- NA # initialize variance column
#SDBH$WD_ceil <- NA # initialize WD ceiling column
#SDBH$WD_floor <- NA # initialize WD floor column
#for (i in 1:nrow(SDBH)) { # loop through small trees rows
# hab_class <- SDBH[i,2]
# RowCode <- SDBH[i,3] # get species code
# SPoi <- RowCode$Sp # get sp subset
#row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV same for SPDWD
#VVal <- SPDWDV$v[row_placement] # get the v value at the row number
#SDBH$var[i] <- VVal # add it in the PS dataframe
# awd <- SPDWD$AVG_WOOD_D[row_placement]
# SDBH$WD[i] <- awd
# SDBH$WD_ceil[i] <- awd + ((1-VVal)*awd)
#SDBH$WD_floor[i] <- awd - ((1-VVal)*awd)
#}
#write.csv(SDBH, file = "SDBH.csv", row.names = FALSE)
# resulting in output: SDBH
Carbon from plots
#### TO ASSIGN WOOD DENSITIES ####
# code will occasionally stop and throw an error, numbers are correct if the code works, so make sure to check the tables and rerun sections that give NAs.
#subset_DBH$WD <- NA
#for (i in 1:nrow(subset_DBH)) {
# DBHsp <- subset_DBH$Sp[i]
#matching_indices <- which(SPDWD$Code == DBHsp)
# Check if there are matching indices
#if (length(matching_indices) > 0) {
# w <- SPDWD$AVG_WOOD_D[matching_indices[1]] # Assuming unique matches
# Check if w is not NA before assignment
#if (!is.na(w)) {
# subset_DBH$WD[i] <- w
#}
#}
#}
#### SPLIT DATA ####
# separate by plot
plot_types <- unique(subD$Plot)
# list of data frames for each plot type
split_data <- lapply(plot_types, function(plot_type) {
subset_DBH_subset <- subset(subD, Plot == plot_type)
return(subset_DBH_subset)
})
#### RUN ALLOMETRY ####
# allometry per tree of AGB in kg/20mx20m area
C <- list() # initialize
mat <- matrix(NA, nrow = length(split_data), ncol = 2) # create mat to fill
for (i in 1:length(split_data)) { # loop through split data
plot <- split_data[[i]] # subset based on loop
plot$AGB <- NA # new column of C
#p x exp( -1.499 + 2.148 ln (D) + 0.207(ln(D))^2-0.0281(ln(D))^3) -- allometry
plot$AGB <- with(plot, WD * exp(-1.499 + 2.148 * log(DBH) + 0.207 * log(DBH)^2 - 0.0281 * log(DBH)^3)) # apply allometry
C[[i]] <- (plot) # get plot information
mat[i,1] <- (plot$Plot[1]) # name
mat[i,2] <- (sum(plot$AGB)) # sum of all individual trees
}
mat[, 2] <- as.numeric(mat[, 2]) # make sure numeric data
# column information
R_mat <- mat[c(1, 2, 8, 9), ]
F_mat <- mat[c(3, 4, 5, 6, 7), ]
M_mat <- mat[c(10, 11, 12, 13, 14), ]
# Convert character vectors to numeric just in case
R_mat <- as.numeric(R_mat)
## Warning: NAs introduced by coercion
M_mat <- as.numeric(M_mat)
## Warning: NAs introduced by coercion
F_mat <- as.numeric(F_mat)
## Warning: NAs introduced by coercion
# Combine values into a list
data_list <- list(R_mat, M_mat, F_mat)
# to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/C.pdf"
#pdf(pdf_path)
#### PLOT ####
# box plots side by side
boxplot(data_list, col = c('plum3', 'darkseagreen2', 'chartreuse4'), names = c("R_mat", "M_mat", "F_mat"),
main = "Box Plots", ylab = "Values") # sum per plot
# Convert to Mg/ha
mat <- as.data.frame(mat)
mat[,2] <- as.numeric(as.character(mat[,2]))
mat$CMgHa <- mat[,2] * 0.001/ 0.04
mat$CMgHa
## [1] 303.71403 237.33662 172.32469 649.53696 762.72509 816.92236 485.75671
## [8] 527.86620 208.24346 111.66959 103.58240 65.07394 274.46884 239.75876
#### ANOVA ####
# grouping variable (to do )
group <- c("riverine","riverine","established","established","established","established","established","riverine","riverine","marantaceae","marantaceae","marantaceae","marantaceae","marantaceae")
mat$GV <- group
# ANOVA
anova_result <- aov(CMgHa ~ GV, data = mat)
# Run Tukey post-hoc test
tukey_result <- TukeyHSD(anova_result)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## GV 2 444774 222387 6.68 0.0126 *
## Residuals 11 366187 33290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display detailed results
summary(tukey_result)
## Length Class Mode
## GV 12 -none- numeric
tukey_result$`GV`
## diff lwr upr p adj
## marantaceae-established -418.5425 -730.2066 -106.87832 0.0102023
## riverine-established -258.1631 -588.7328 72.40665 0.1331967
## riverine-marantaceae 160.3794 -170.1904 490.94911 0.4187101
# mar and dense have sig differences
#### ANOVA ASSUMPTIONS ####
# shapiro
# established
shapiro_test_resultEST <- shapiro.test(mat[3:7,3])
# display the result
print(shapiro_test_resultEST)
##
## Shapiro-Wilk normality test
##
## data: mat[3:7, 3]
## W = 0.91051, p-value = 0.4707
# mar
shapiro_test_resultMAR <- shapiro.test(mat[10:14,3])
# display the result
print(shapiro_test_resultMAR)
##
## Shapiro-Wilk normality test
##
## data: mat[10:14, 3]
## W = 0.87082, p-value = 0.2697
# riv
shapiro_test_resultRIV <- shapiro.test(mat[c(1,2,8,9),3])
# display the result
print(shapiro_test_resultRIV)
##
## Shapiro-Wilk normality test
##
## data: mat[c(1, 2, 8, 9), 3]
## W = 0.84761, p-value = 0.2185
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(CMgHa ~ GV, data = mat)
# display the result
print(bartlett_test_result)
##
## Bartlett test of homogeneity of variances
##
## data: CMgHa by GV
## Bartlett's K-squared = 3.5911, df = 2, p-value = 0.166
mean(mat[10:14,2])
## [1] 6356.428
mean(mat[c(1,2,8,9),2])
## [1] 12771.6
mean(mat[3:7,2])
## [1] 23098.13
#### AUTOCORRELATION ####
Moran.I(mat[10:14,2], MAR.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.4297229
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.2175661
##
## $p.value
## [1] 0.4087694
Moran.I(mat[c(1,2,8,9),2], RIV.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.4470218
##
## $expected
## [1] -0.3333333
##
## $sd
## [1] 0.2952488
##
## $p.value
## [1] 0.700193
Moran.I(mat[3:7,2], DEN.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.3562061
##
## $expected
## [1] -0.25
##
## $sd
## [1] 0.4111628
##
## $p.value
## [1] 0.7961702
Run on ceiling and floor based on wood density variance
#### SPLIT DATA ####
# separate by plot
plot_types <- unique(SDBH$Plot)
# list of data frames for each plot type
split_data <- lapply(plot_types, function(plot_type) {
subset_DBH_subset <- subset(SDBH, Plot == plot_type)
return(subset_DBH_subset)
})
#### RUN ALLOMETRY ####
# allometry per tree of AGB in kg/20mx20m area
D <- list() # initialize
matCiel <- matrix(NA, nrow = length(split_data), ncol = 2) # create mat to fill
for (i in 1:length(split_data)) { # loop through split data
plot <- split_data[[i]] # subset based on loop
plot$AGB <- NA # new column of C
#p x exp( -1.499 + 2.148 ln (D) + 0.207(ln(D))^2-0.0281(ln(D))^3) -- allometry
plot$AGB <- with(plot, WD_ceil * exp(-1.499 + 2.148 * log(DBH) + 0.207 * log(DBH)^2 - 0.0281 * log(DBH)^3)) # apply allometry
D[[i]] <- (plot) # get plot information
matCiel[i,1] <- (plot$Plot[1]) # name
matCiel[i,2] <- (sum(plot$AGB)) # sum of all individual trees
}
matCiel[, 2] <- as.numeric(matCiel[, 2]) # make sure numeric data
# column information
R_mat <- matCiel[c(1, 2, 8, 9), ]
F_mat <- matCiel[c(3, 4, 5, 6, 7), ]
M_mat <- matCiel[c(10, 11, 12, 13, 14), ]
# Convert character vectors to numeric just in case
R_mat <- as.numeric(R_mat)
## Warning: NAs introduced by coercion
M_mat <- as.numeric(M_mat)
## Warning: NAs introduced by coercion
F_mat <- as.numeric(F_mat)
## Warning: NAs introduced by coercion
# Combine values into a list
data_list <- list(na.omit(R_mat), na.omit(M_mat), na.omit(F_mat))
# Convert to Mg/ha
matCiel <- as.data.frame(matCiel)
matCiel[,2] <- as.numeric(as.character(matCiel[,2]))
matCiel$CMgHa <- matCiel[,2] * 0.001/ 0.04
matCiel$CMgHa
## [1] 394.66337 375.92490 212.11268 723.09044 848.07714 971.90466 572.97147
## [8] 659.65005 231.25517 123.14901 130.51382 70.47897 314.29234 291.45400
#### SPLIT DATA ####
# separate by plot
plot_types <- unique(SDBH$Plot)
# list of data frames for each plot type
split_data <- lapply(plot_types, function(plot_type) {
subset_DBH_subset <- subset(SDBH, Plot == plot_type)
return(subset_DBH_subset)
})
#### RUN ALLOMETRY ####
# allometry per tree of AGB in kg/20mx20m area
D <- list() # initialize
matFloor <- matrix(NA, nrow = length(split_data), ncol = 2) # create mat to fill
for (i in 1:length(split_data)) { # loop through split data
plot <- split_data[[i]] # subset based on loop
plot$AGB <- NA # new column of C
#p x exp( -1.499 + 2.148 ln (D) + 0.207(ln(D))^2-0.0281(ln(D))^3) -- allometry
plot$AGB <- with(plot, WD_floor * exp(-1.499 + 2.148 * log(DBH) + 0.207 * log(DBH)^2 - 0.0281 * log(DBH)^3)) # apply allometry
D[[i]] <- (plot) # get plot information
matFloor[i,1] <- (plot$Plot[1]) # name
matFloor[i,2] <- (sum(plot$AGB)) # sum of all individual trees
}
matFloor[, 2] <- as.numeric(matFloor[, 2]) # make sure numeric data
# column information
R_mat <- matFloor[c(1, 2, 8, 9), ]
F_mat <- matFloor[c(3, 4, 5, 6, 7), ]
M_mat <- matFloor[c(10, 11, 12, 13, 14), ]
# Convert character vectors to numeric just in case
R_mat <- as.numeric(R_mat)
## Warning: NAs introduced by coercion
M_mat <- as.numeric(M_mat)
## Warning: NAs introduced by coercion
F_mat <- as.numeric(F_mat)
## Warning: NAs introduced by coercion
# Combine values into a list
data_list <- list(na.omit(R_mat), na.omit(M_mat), na.omit(F_mat))
# Convert to Mg/ha
matFloor <- as.data.frame(matFloor)
matFloor[,2] <- as.numeric(as.character(matFloor[,2]))
matFloor$CMgHa <- matFloor[,2] * 0.001/ 0.04
matFloor$CMgHa
## [1] 212.76470 98.74834 132.53670 575.98348 677.37304 661.94006 398.54196
## [8] 396.08234 185.23175 100.19017 76.65098 59.66890 234.64534 188.06352
MAT <- cbind(mat$V1, matFloor$CMgHa, mat$CMgHa, matCiel$CMgHa)
—- REMOTE SENSING DATA ANALYSIS —-
Land classification accuracy and statistics calculated via ArcGIS Pro and Excel
Clip data
#### CHM ####
# Use mask function to clip CHM to the footprint
CHM_clipped <- mask(CHM, FOOTP)
plot(CHM_clipped) # view
#### LiDAR Land Classification ####
# mask
LidLC_clipped <- mask(LidLC, FOOTP) # side-by-side plot clipped and unclipped
color_palette <- c('darkseagreen2', 'plum3', 'tan', 'midnightblue','chartreuse4', 'chocolate4', 'gray9')
par(mfrow=c(1, 2)) # side-by-side plot clipped and unclipped
plot(LidLC, col = color_palette, breaks = c(0, 1, 9, 10, 90, 100, 1000, 10000), main = "LidLC Raster", legend = TRUE)
plot(LidLC_clipped, col = color_palette, breaks = c(0, 1, 9, 10, 90, 100, 1000, 10000), main = "LidLC Raster Clip", legend = TRUE)
par(mfrow=c(1, 1)) # back to normal plot
Classify sections of the LiDAR CHM from analysis of the LiDAR land classification !DO NOT RUN! running takes a long time. Outputs already loaded in.
#### CLASSIFY POLYGONS FROM LiDAR LAND CLASS WHERE THERE IS 95% CLASSIFICATION CONFIDENCE ####
#k = 1 # initialize
#Pltz$Value <- NA # initialize new column
#Pltz_sf <- st_as_sf(Pltz, coords = c("X", "Y")) # spatial
#for (i in 1:4639) { # loop through randomly created polygons
#extent_polygon <- st_bbox(Pltz_sf[i, "geometry"]) # get the poly extent
#raster_cropped <- crop(LidLC_clipped, extent_polygon) # crop classification raster to selected poly extent
#values_in_extent <- raster::extract(raster_cropped, Pltz[i, ], buffer = 0.1) # get values from cropped raster
#value_table <- table(unlist(values_in_extent)) # values to table
#most_common_value <- names(which.max(value_table)) # most common value
#proportion <- value_table[most_common_value] / sum(value_table) # how common?
#is_95_percent_or_more <- proportion >= 0.95 # if the proportion is 95% or more
#if (is_95_percent_or_more) { #if yes, is 95% or more
#Pltz$Value[i] <- most_common_value # then save the most common value
#} else { # otherwise
# Pltz$Value[i] <- NA # denote most common value as NA
#}
#}
#### SUBSET POLYGONS OF 95% CLASSIFICATION CONFIDENCE ####
#subset_Pltz <- Pltz[complete.cases(Pltz$Value), ] # where Pltz$Value has value
#subset_Pltz$Value <- as.numeric(as.character(subset_Pltz$Value)) # convert to numeric
#value_counts <- table(subset_Pltz$Value) # occurrences of each land class type
#print(value_counts) # print to evaluate
# classification values to care about: 1, 9, and 100 (the forest types). 849 is minimum number of polygons in the classification values...
# So, take randomly 849 of 882, 880, and 849.
#set.seed(123) # randomly select polygons: for replicability
# randomly select 849 rows where subset_Pltz$Value equals 1 (mar)
#MarSub <- subset_Pltz[subset_Pltz$Value == 1, ][sample(nrow(subset_Pltz[subset_Pltz$Value == 1, ]), 849), #]
#plot(MarSub) # plot polygons
# randomly select 849 rows where subset_Pltz$Value equals 9 (den)
#DenSub <- subset_Pltz[subset_Pltz$Value == 9, ][sample(nrow(subset_Pltz[subset_Pltz$Value == 9, ]), 849), #]
#plot(DenSub) # plot polygons
# randomly select 849 rows where subset_Pltz$Value equals 100 (riv)
#RivSub <- subset_Pltz[subset_Pltz$Value == 100, ][sample(nrow(subset_Pltz[subset_Pltz$Value == 100, ]), 849), ]
#plot(RivSub) # plot polygons
#### CLIP CHM TO SELECTED POLYGONS ####
#Does not always work in chunk for some reason... if this happens, run in console
#temp_dir <- tempdir() # set temporary directory to reduce clutter and confusion
#options(wd = temp_dir) # set to wd
#CHM_clipped_mar_sub <- raster::mask(CHM_clipped, MarSub) # clip CHM_clipped to MarSub polygons
#plot(CHM_clipped_mar_sub, main = "CHM_clipped to MarSub Polygons") # plot
#CHM_clipped_riv_sub <- raster::mask(CHM_clipped, RivSub) # clip CHM_clipped to RivSub polygons
#plot(CHM_clipped_riv_sub, main = "CHM_clipped to RivSub Polygons") # plot
#CHM_clipped_den_sub <- raster::mask(CHM_clipped, DenSub) # clip CHM_clipped to DenSub polygons
#plot(CHM_clipped_den_sub, main = "CHM_clipped to DenSub Polygons") # plot
#### TURN CHM CLIPS TO NON-SPATIAL DATA ####
#result_list <- list() # initialize list
#for (i in 1:nrow(MarSub)) { # loop through MarSub polygons
#chm_values <- raster::extract(CHM_clipped, MarSub[i,], df = TRUE, cellnumbers = TRUE) # get chm values for the polygon selected
#coordinates <- raster::xyFromCell(CHM_clipped, chm_values$cell) # collect the coordinates
#result_df <- data.frame( # combine CHM value and coordinates and add to a data frame
# Longitude = coordinates[, 1],
# Latitude = coordinates[, 2],
#CHM_Value = chm_values$CHMs)
# result_list <- append(result_list, list(result_df)) # add data frame to a list of results
# print(i) to check the loop
#}
#Mresult_final_df <- do.call(rbind, result_list) # expand out the list to a dataframe of results
#result_list <- list() # initialize list
#for (i in 1:nrow(RivSub)) { # loop through RivSub polygons
#chm_values <- raster::extract(CHM_clipped, RivSub[i,], df = TRUE, cellnumbers = TRUE) # get chm values for the polygon selected
#coordinates <- raster::xyFromCell(CHM_clipped, chm_values$cell) # collect the coordinates
#result_df <- data.frame( # combine CHM value and coordinates and add to a data frame
# Longitude = coordinates[, 1],
# Latitude = coordinates[, 2],
# CHM_Value = chm_values$CHMs)
# result_list <- append(result_list, list(result_df)) # add data frame to a list of results
# print(i) # as a check
#}
#Rresult_final_df <- do.call(rbind, result_list) # expand out the list to a dataframe of results
#result_list <- list() # initialize list
#for (i in 1:nrow(DenSub)) { # loop through DenSub polygons
#chm_values <- raster::extract(CHM_clipped, DenSub[i,], df = TRUE, cellnumbers = TRUE) # get chm values #for the polygon selected
#coordinates <- raster::xyFromCell(CHM_clipped, chm_values$cell) # collect the coordinates
# result_df <- data.frame( # combine CHM value and coordinates and add to a data frame
# Longitude = coordinates[, 1],
# Latitude = coordinates[, 2],
# CHM_Value = chm_values$CHMs)
#result_list <- append(result_list, list(result_df)) # add data frame to a list of results
# print(i) # a check
#}
#Dresult_final_df <- do.call(rbind, result_list) # expand out the list to a dataframe of results
#### OUTPUT NON-SPATIAL DATA TO .CSV ####
#write.csv(Mresult_final_df, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/MRes.csv", row.names = FALSE) # save
#write.csv(Rresult_final_df, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/RRes.csv", row.names = FALSE) # save
#write.csv(Dresult_final_df, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/DRes.csv", row.names = FALSE) # save
#### OUTPUT SELECTED POLYGONS TO .GEOJSON ####
#st_write(RivSub, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/RivSub.geojson", driver = "GeoJSON") # save
#st_write(DenSub, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/DenSub.geojson", driver = "GeoJSON") # save
#st_write(MarSub, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/MarSub.geojson", driver = "GeoJSON") # save
#### OUTPUT CHM CLIPS AS .TIF FILES ####
#output_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/CHM_clipped_den_sub.tif" # path
#raster::writeRaster(CHM_clipped_den_sub, filename = output_path, format = "GTiff", overwrite = TRUE) # save
#output_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/CHM_clipped_riv_sub.tif" # path
#raster::writeRaster(CHM_clipped_riv_sub, filename = output_path, format = "GTiff", overwrite = TRUE) # save
#output_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/CHM_clipped_mar_sub.tif" # path
#raster::writeRaster(CHM_clipped_mar_sub, filename = output_path, format = "GTiff", overwrite = TRUE) # save
Canopy height histograms
#### !DO NOT RUN! FROM SPATIAL DATA ####
# the same as the non-spatial data, but takes longer. !DO NOT RUN! this code
#hist_valuesM <- hist(values(CHM_MARs), main = "Histogram of CHM_MARs", xlab = "Pixel Values", col = "skyblue", border = "black") # Mar
#hist_valuesD <- hist(values(CHM_DENs), main = "Histogram of CHM_DENs", xlab = "Pixel Values", col = "skyblue", border = "black") # Den
#hist_valuesR <- hist(values(CHM_RIVs), main = "Histogram of CHM_RIVs", xlab = "Pixel Values", col = "skyblue", border = "black") # Riv
#### FROM NON-SPATIAL DATA ####
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/CHMMvisHist.pdf"
#pdf(pdf_path)
hist(MRes$CHM_Value) # Mar
#dev.off()
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/CHMRvisHist.pdf"
#pdf(pdf_path)
hist(RRes$CHM_Value) # Riv
#dev.off()
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/CHMDvisHist.pdf"
#pdf(pdf_path)
hist(DRes$CHM_Value) # Den
#dev.off()
#### K-S TEST ####
# M and R
ks_test_MRes_RRes <- ks.test(MRes$CHM_Value, RRes$CHM_Value)
## Warning in ks.test.default(MRes$CHM_Value, RRes$CHM_Value): p-value will be
## approximate in the presence of ties
print("KS Test between MRes and RRes:")
## [1] "KS Test between MRes and RRes:"
print(ks_test_MRes_RRes)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: MRes$CHM_Value and RRes$CHM_Value
## D = 0.24914, p-value < 2.2e-16
## alternative hypothesis: two-sided
# M and D
ks_test_MRes_DRes <- ks.test(MRes$CHM_Value, DRes$CHM_Value)
## Warning in ks.test.default(MRes$CHM_Value, DRes$CHM_Value): p-value will be
## approximate in the presence of ties
print("KS Test between MRes and DRes:")
## [1] "KS Test between MRes and DRes:"
print(ks_test_MRes_DRes)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: MRes$CHM_Value and DRes$CHM_Value
## D = 0.31202, p-value < 2.2e-16
## alternative hypothesis: two-sided
# R and D
ks_test_RRes_DRes <- ks.test(RRes$CHM_Value, DRes$CHM_Value)
## Warning in ks.test.default(RRes$CHM_Value, DRes$CHM_Value): p-value will be
## approximate in the presence of ties
print("KS Test between RRes and DRes:")
## [1] "KS Test between RRes and DRes:"
print(ks_test_RRes_DRes)
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: RRes$CHM_Value and DRes$CHM_Value
## D = 0.37502, p-value < 2.2e-16
## alternative hypothesis: two-sided
# all significantly different, but sample size is quite large, so check with false discover Benjamini-Hochberg
p_values <- c(ks_test_MRes_RRes$p.value, ks_test_MRes_DRes$p.value, ks_test_RRes_DRes$p.value)
#p-values in a vector
# Benjamin-Hochberg correction
adjusted_p_values <- p.adjust(p_values, method = "BH")
# adjusted p-values
print(adjusted_p_values)
## [1] 0 0 0
#### ANOVA ASSUMPTIONS ####
# have to randomly select from values because sample size is too large for shapiro test
set.seed(1) # Setting a seed for reproducibility
random_subsetM <- sample(MRes$CHM_Value, size = 5000, replace = FALSE)
shapiro_test_resultMAR <- shapiro.test(random_subsetM)
# display the result
print(shapiro_test_resultMAR)
##
## Shapiro-Wilk normality test
##
## data: random_subsetM
## W = 0.92461, p-value < 2.2e-16
# have to randomly select from values because sample size is too large for shapiro test
set.seed(1) # Setting a seed for reproducibility
random_subsetR <- sample(RRes$CHM_Value, size = 5000, replace = FALSE)
shapiro_test_resultRIV <- shapiro.test(random_subsetR)
# display the result
print(shapiro_test_resultRIV)
##
## Shapiro-Wilk normality test
##
## data: random_subsetR
## W = 0.99414, p-value = 2.404e-13
# have to randomly select from values because sample size is too large for shapiro test
set.seed(1) # Setting a seed for reproducibility
random_subsetD <- sample(DRes$CHM_Value, size = 5000, replace = FALSE)
shapiro_test_resultDEN <- shapiro.test(random_subsetD)
# display the result
print(shapiro_test_resultDEN)
##
## Shapiro-Wilk normality test
##
## data: random_subsetD
## W = 0.97962, p-value < 2.2e-16
random_subsetM <- data.frame(random_subsetM)
random_subsetM$Hab <- "Mar"
names(random_subsetM) <- c("RS","Hab")
random_subsetR <- data.frame(random_subsetR)
random_subsetR$Hab <- "Riv"
names(random_subsetR) <- c("RS","Hab")
random_subsetD <- data.frame(random_subsetD)
random_subsetD$Hab <- "Den"
names(random_subsetD) <- c("RS","Hab")
A_subs <- rbind(random_subsetD,random_subsetR,random_subsetM)
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(RS ~ Hab, data = A_subs)
# display the result
print(bartlett_test_result)
##
## Bartlett test of homogeneity of variances
##
## data: RS by Hab
## Bartlett's K-squared = 1343.6, df = 2, p-value < 2.2e-16
#### KW Test ####
kw_result <- kruskal.test(RS ~ Hab, data = A_subs)
# display test results
print(kw_result)
##
## Kruskal-Wallis rank sum test
##
## data: RS by Hab
## Kruskal-Wallis chi-squared = 1407.9, df = 2, p-value < 2.2e-16
# post hoc with Dunn's
dunn_result <- dunn.test(A_subs$RS, g = A_subs$Hab, method = "bonferroni")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 1407.9435, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | Den Mar
## ---------+----------------------
## Mar | 34.32759
## | 0.0000*
## |
## Riv | 30.31946 -4.116823
## | 0.0000* 0.0001*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# results
print(dunn_result)
## $chi2
## [1] 1407.944
##
## $Z
## [1] 34.327591 30.319469 -4.116824
##
## $P
## [1] 1.521145e-258 3.174895e-202 1.920650e-05
##
## $P.adjusted
## [1] 4.563434e-258 9.524686e-202 5.761950e-05
##
## $comparisons
## [1] "Den - Mar" "Den - Riv" "Mar - Riv"
#### POLISHED HISTOGRAM ####
# Set up the layout for the three-panel plot
par(mfrow = c(3, 1))
# Define the x-axis range
x_range <- c(0, 50)
y_range <- c(0,.06)
# Plot histogram for MRes$CHM_Value
hist(MRes$CHM_Value, col = 'plum3', main = "Mar", xlab = "CHM_Value", ylab = "Frequency", prob = TRUE, xlim = x_range, ylim = y_range)
# Plot histogram for RRes$CHM_Value
hist(RRes$CHM_Value, col = 'chartreuse4', main = "Riv", xlab = "CHM_Value", ylab = "Frequency", prob = TRUE, xlim = x_range, ylim = y_range)
# Plot histogram for DRes$CHM_Value
hist(DRes$CHM_Value, col = 'darkseagreen2', main = "Den", xlab = "CHM_Value", ylab = "Frequency", prob = TRUE, xlim = x_range,ylim = y_range)
Canopy gap
#### !DO NOT RUN! OVERALL CANOPY GAP ####
# This section takes very long to run -- !DO NOT RUN!: values already calculated
## mar
CHM_MARs_reclassified <- raster::reclassify(CHM_MARs, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
#average_valueM <- cellStats(CHM_MARs_reclassified, stat = "mean") # calculate mean
#average_valueM # which gives ratio of 1s to 0s ... AKA gap %
## riv
CHM_RIVs_reclassified <- raster::reclassify(CHM_RIVs, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
#average_valueR <- cellStats(CHM_RIVs_reclassified, stat = "mean") # calculate mean
#average_valueR # which gives ratio of 1s to 0s ... AKA gap %
## den
CHM_DENs_reclassified <- raster::reclassify(CHM_DENs, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
#average_valueD <- cellStats(CHM_DENs_reclassified, stat = "mean") # calculate mean
#average_valueD # which gives ratio of 1s to 0s ... AKA gap %
#### 20 x 20m PLOT CANOPY GAP ####
n <- nrow(MarSub) + nrow(RivSub) + nrow(DenSub) # overall polygon numbers
gap_polygon_matrix <- matrix(nrow = n, ncol = 3) # initialize matrix
colnames(gap_polygon_matrix) <- c("gap", "polygon", "treatment") # new column names
for (i in 1:nrow(MarSub)) { # loop through mar polygons
CHM_MARs_clippedR <- crop(CHM_MARs_reclassified, extent(MarSub[i,])) # crop 1s and 0s chm to selected polygon
gap_polygon_matrix[i,1] <- cellStats(CHM_MARs_clippedR, stat = "mean") # calculate gap and add to data
gap_polygon_matrix[i,2] <- i # polygon number
gap_polygon_matrix[i,3] <- 1 # forest type classification number
}
for (i in 1:nrow(RivSub)) { # loop through riv polygons
CHM_RIVs_clippedR <- crop(CHM_RIVs_reclassified, extent(RivSub[i,])) # crop 1s and 0s chm to selected polygon
gap_polygon_matrix[(i + nrow(MarSub)),1] <- cellStats(CHM_RIVs_clippedR, stat = "mean") # calculate gap and add to data
gap_polygon_matrix[(i + nrow(MarSub)),2] <- i # polygon number
gap_polygon_matrix[(i + nrow(MarSub)),3] <- 100 # forest type classification number
}
for (i in 1:nrow(DenSub)) { # loop through den polygons
CHM_DENs_clippedR <- crop(CHM_DENs_reclassified, extent(DenSub[i,])) # crop 1s and 0s chm to selected polygon
gap_polygon_matrix[(i + nrow(MarSub) + nrow(RivSub)),1] <- cellStats(CHM_DENs_clippedR, stat = "mean") # calculate gap and add to data
gap_polygon_matrix[(i + nrow(MarSub) + nrow(RivSub)),2] <- i # polygon number
gap_polygon_matrix[(i + nrow(MarSub) + nrow(RivSub)),3] <- 9 # forest type classification number
}
#### BOX PLOT ####
gap_polygon_df <- as.data.frame(gap_polygon_matrix) # create a data frame from the matrix
gap_polygon_df$gap <- as.numeric(gap_polygon_df$gap) # ensure gap is numeric
#### KRUSKAL WALLIS ####
kruskal_result <- kruskal.test(gap ~ treatment, data = gap_polygon_df) # run kw test
print(kruskal_result) # print results
##
## Kruskal-Wallis rank sum test
##
## data: gap by treatment
## Kruskal-Wallis chi-squared = 953.33, df = 2, p-value < 2.2e-16
posthoc_dunn <- dunn.test(gap_polygon_df$gap, gap_polygon_df$treatment, method = "bonferroni") # Dunn post hoc with bonferonni
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 953.3166, df = 2, p-value = 0
##
##
## Comparison of x by group
## (Bonferroni)
## Col Mean-|
## Row Mean | 1 100
## ---------+----------------------
## 100 | -30.38687
## | 0.0000*
## |
## 9 | -19.93325 10.45361
## | 0.0000* 0.0000*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
print("Post-hoc Dunn's Test:")
## [1] "Post-hoc Dunn's Test:"
print(posthoc_dunn) # results
## $chi2
## [1] 953.3166
##
## $Z
## [1] -30.38687 -19.93326 10.45362
##
## $P
## [1] 4.094758e-203 1.047390e-88 7.052080e-26
##
## $P.adjusted
## [1] 1.228427e-202 3.142169e-88 2.115624e-25
##
## $comparisons
## [1] "1 - 100" "1 - 9" "100 - 9"
# all significant differences
# add letters to box plot (to do)
# Define custom colors for each unique treatment
custom_colors <- c('darkseagreen2','plum3', 'chartreuse4')
# Create a box plot of gap by treatment with custom colors
boxplot(gap ~ treatment, data = gap_polygon_df,
main = "Box Plot of Gap by Treatment",
ylab = "Gap",
col = custom_colors[as.factor(unique(gap_polygon_df$treatment))],
border = "black")
#### ANOVA ASSUMPTIONS ####
shapiro_test_resultMAR <- shapiro.test(gap_polygon_df$gap[gap_polygon_df$treatment==1])
# display the result
print(shapiro_test_resultMAR)
##
## Shapiro-Wilk normality test
##
## data: gap_polygon_df$gap[gap_polygon_df$treatment == 1]
## W = 0.93913, p-value < 2.2e-16
shapiro_test_resultRIV <- shapiro.test(gap_polygon_df$gap[gap_polygon_df$treatment==9])
# display the result
print(shapiro_test_resultRIV)
##
## Shapiro-Wilk normality test
##
## data: gap_polygon_df$gap[gap_polygon_df$treatment == 9]
## W = 0.79899, p-value < 2.2e-16
shapiro_test_resultDEN <- shapiro.test(gap_polygon_df$gap[gap_polygon_df$treatment==100])
# display the result
print(shapiro_test_resultDEN)
##
## Shapiro-Wilk normality test
##
## data: gap_polygon_df$gap[gap_polygon_df$treatment == 100]
## W = 0.63623, p-value < 2.2e-16
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(gap ~ treatment, data = gap_polygon_df)
# display the result
print(bartlett_test_result)
##
## Bartlett test of homogeneity of variances
##
## data: gap by treatment
## Bartlett's K-squared = 1069.3, df = 2, p-value < 2.2e-16
Canopy height statistics: mean, median, max, min, and SD
#### RECLASSIFY ####
# Reclassify for canopy height -- values to NA for values equal to and below 6 to get only trees
chm_mars_reclassified <- CHM_MARs # new data to reclass
chm_mars_reclassified[chm_mars_reclassified <= 6] <- NA # reclassify
#### STATISTICS THAT ARE NOT SD ####
## mar
MReCl <- MRes[MRes$CHM_Value > 6, ]
summary(MReCl)
## Longitude Latitude CHM_Value
## Min. :518665 Min. : 66528 Min. : 6.001
## 1st Qu.:527770 1st Qu.: 68328 1st Qu.:17.000
## Median :528407 Median : 84647 Median :21.399
## Mean :529427 Mean : 80442 Mean :21.130
## 3rd Qu.:533234 3rd Qu.: 85211 3rd Qu.:25.439
## Max. :534650 Max. :100181 Max. :54.591
## NA's :9233 NA's :9233 NA's :9233
# median: 21.399, mean: 25.677, max: 57.462, min: (all mins will be around 6 because of canopy cutoff)
## riv
RReCl <- RRes[RRes$CHM_Value > 6, ]
summary(RReCl)
## Longitude Latitude CHM_Value
## Min. :518860 Min. : 66663 Min. : 6.001
## 1st Qu.:528064 1st Qu.: 67399 1st Qu.:13.018
## Median :533025 Median : 67710 Median :17.851
## Mean :530109 Mean : 76511 Mean :17.960
## 3rd Qu.:533775 3rd Qu.: 84615 3rd Qu.:22.062
## Max. :534650 Max. :101266 Max. :47.050
## NA's :3902 NA's :3902 NA's :3902
# median: 17.851 , mean: 17.960 , max: 47.050, min: (all mins will be around 6 because of canopy cutoff)
## den
DReCl <- DRes[DRes$CHM_Value > 6, ]
summary(DReCl)
## Longitude Latitude CHM_Value
## Min. :518522 Min. : 66494 Min. : 6.001
## 1st Qu.:519106 1st Qu.: 99751 1st Qu.:18.471
## Median :519651 Median :100288 Median :26.036
## Mean :521493 Mean : 95803 Mean :25.735
## 3rd Qu.:520183 3rd Qu.:100714 3rd Qu.:32.777
## Max. :534503 Max. :101250 Max. :58.586
## NA's :8601 NA's :8601 NA's :8601
# median: 26.036, mean: 25.735, max: 58.586, min: (all mins will be around 6 because of canopy cutoff)
#### SD ####
## mar
missing_values <- sum(is.na(MRes$CHM_Value)) # deal w missing values
if (missing_values > 0) { # if missing
MRes <- na.omit(MRes) # omit
}
sd(MRes$CHM_Value) # calculate SD
## [1] 9.949817
#9.949817
## riv
missing_values <- sum(is.na(RRes$CHM_Value)) # deal w missing values
if (missing_values > 0) { # if missing
RRes <- na.omit(RRes) # omit
}
sd(RRes$CHM_Value) # calculate SD
## [1] 6.796924
#6.796924
## den
missing_values <- sum(is.na(DRes$CHM_Value)) # deal w missing values
if (missing_values > 0) { # if missing
DRes <- na.omit(DRes) # omit
}
sd(DRes$CHM_Value) # calculate SD
## [1] 11.55631
#11.55631
—- LANDSCAPE PATTERNS RELATED TO AGB/AGC —- Organize plotwise C information !LONG RUNTIME!
#### CARBON AND TEST PLOTS W NDVI ####
# ndvi not included in thesis, but still interesting to look at, so the code stays. The merged_data product also is used in subsequent code. WARNING: this code takes very long to run.
ndv_mat <- matrix(NA, nrow = 14, ncol = 2) # initialize
for (i in 1:14) { # loop through obs polygons
bufpts_first <- bufpts[i, ] # Subset bufpts for polygon # i
bufpts_raster_first <- rasterize(bufpts_first, ndv) # convert subset to raster for masking
ndvi_masked_first <- mask(ndv, bufpts_raster_first) # Mask ndvi using buffered points subset
average_ndvi <- cellStats(ndvi_masked_first, stat = "mean", na.rm = TRUE) # calc avg ndvi
ndv_mat[i,1] <- average_ndvi # apply to recently initialized matrix
ndv_mat[i,2] <- bufpts$plot[i] # add bufpts information too
}
# order check so that all plots line up
mat <- mat[order(mat[, 1]), ]
ndv_mat <- ndv_mat[order(ndv_mat[, 2]), ]
# Also, floor and ceiling
MAT <- MAT[order(MAT[, 1]), ]
# merge together: uncommented includes floor and ceiling
#merged_data <- cbind(ndv_mat, mat[, -1])
merged_data <- cbind(ndv_mat, MAT[,2:4])
# Assign new row names
colnames(merged_data) <- c("NDVI","PLOT","CMgHaFloor","CMgHa","CMgHaCeil")
Carbon and canopy gap relationship
# this also looks at LANDSAT band 4. While not in the thesis, interesting relationship. The product of this code is also used for the rest of the thesis code.
marpnts <- bufpts[6:10,] # subset mar points
CHM_clipped_Reclass <- raster::reclassify(CHM_clipped, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
gm <- matrix(nrow = 14, ncol = 4) # initialize
# clip CHM_clipped to MarSub polygons
for (i in 1:nrow(marpnts)) { # loop through mar polygons
CHM_MARs_clippedR <- crop(CHM_clipped_Reclass, extent(marpnts[i,])) # crop 1s and 0s chm to selected polygon
B4_MARs_clippedR <- crop(B4, extent(marpnts[i,])) # crop 1s and 0s chm to selected polygon
gm[i,1] <- cellStats(CHM_MARs_clippedR, stat = "mean") # calculate gap and add to data
gm[i,2] <- i # polygon number
gm[i,3] <- 1 # forest type classification number
gm[i,4] <- cellStats(B4_MARs_clippedR, stat = "mean") # band 4 avg
}
MARmerged_data <- data.frame(merged_data[6:10,]) # subset from merged_data for mar info
MARmerged_data$GAP <- gm[1:5,1] # get gap info
MARmerged_data$B4 <- gm[1:5,4] # band 4
#colnames(MARmerged_data) <- c("NDVI", "PLOT", "CMgHaFloor", "CMgHa", "CMgHaCeil", "GAP", "B4") # change naming options
MARmerged_data$NDVI <- as.numeric(MARmerged_data$NDVI) # make sure numeric
#### GAP - C MODEL ####
#linear regression model: gap explains carbon var quite well with r^ of
modelMar <- lm(CMgHa ~ GAP, data = MARmerged_data)
# summary of the model
summary(modelMar)
##
## Call:
## lm(formula = CMgHa ~ GAP, data = MARmerged_data)
##
## Residuals:
## 1 2 3 4 5
## -11.997 38.557 14.284 -48.022 7.179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -106.96 59.96 -1.784 0.1724
## GAP 368.07 79.71 4.618 0.0191 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.38 on 3 degrees of freedom
## Multiple R-squared: 0.8767, Adjusted R-squared: 0.8355
## F-statistic: 21.32 on 1 and 3 DF, p-value: 0.01911
#RMSE
# Residuals
residuals <- c(-11.997, 38.557, 14.284, -48.022, 7.179)
# Mean Squared Residuals
mse <- mean(residuals^2)
# RMSE
rmse <- sqrt(mse)
# Print RMSE
print(rmse)
## [1] 28.95601
#### GAP - C MODEL ####
#linear regression model: gap explains carbon var quite well with r^ of
modelMarFLOOR <- lm(CMgHaFloor ~ GAP, data = MARmerged_data)
# summary of the model
summary(modelMarFLOOR)
##
## Call:
## lm(formula = CMgHaFloor ~ GAP, data = MARmerged_data)
##
## Residuals:
## 1 2 3 4 5
## -7.412 36.241 22.660 -49.411 -2.077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -78.55 60.92 -1.290 0.2876
## GAP 291.27 80.99 3.596 0.0369 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.98 on 3 degrees of freedom
## Multiple R-squared: 0.8117, Adjusted R-squared: 0.749
## F-statistic: 12.93 on 1 and 3 DF, p-value: 0.03685
#RMSE
# Residuals
residuals <- c(-7.412, 36.241, 22.660, -49.411, -2.077)
# Mean Squared Residuals
mse <- mean(residuals^2)
# RMSE
rmse <- sqrt(mse)
# Print RMSE
print(rmse)
## [1] 29.41967
#### GAP - C MODEL ####
#linear regression model: gap explains carbon var quite well with r^ of
modelMarCEIL <- lm(CMgHaCeil ~ GAP, data = MARmerged_data)
# summary of the model
summary(modelMarCEIL)
##
## Call:
## lm(formula = CMgHaCeil ~ GAP, data = MARmerged_data)
##
## Residuals:
## 1 2 3 4 5
## -16.581 40.872 5.908 -46.633 16.435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -135.38 61.60 -2.198 0.1154
## GAP 444.87 81.89 5.432 0.0122 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.41 on 3 degrees of freedom
## Multiple R-squared: 0.9077, Adjusted R-squared: 0.877
## F-statistic: 29.51 on 1 and 3 DF, p-value: 0.01224
#RMSE
# Residuals
residuals <- c(-16.581, 40.872, 5.908, -46.633, 16.435)
# Mean Squared Residuals
mse <- mean(residuals^2)
# RMSE
rmse <- sqrt(mse)
# Print RMSE
print(rmse)
## [1] 29.74929
Calculate C predictions on the cgap raster
all_values <- raster::getValues(TENmresGap)
#non_na_values <- na.omit(all_values)
#view(non_na_values) # for visualization without NA values
new_data <- data.frame(GAP = all_values) # dataframe
# Use the predict function to get predictions from the created model
predictions <- predict(modelMar, newdata = new_data)
predicted_raster <- raster(TENmresGap) # transform to a raster
values(predicted_raster) <- predictions # predictions to the raster
#writeRaster(predicted_raster, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/predicted_raster.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif
#### floor ####
all_values <- raster::getValues(TENmresGap)
#non_na_values <- na.omit(all_values)
#view(non_na_values) # for visualization without NA values
new_dataFLOOR <- data.frame(GAP = all_values) # dataframe
# Use the predict function to get predictions from the created model
predictionsFLOOR <- predict(modelMarFLOOR, newdata = new_dataFLOOR)
predicted_rasterFLOOR <- raster(TENmresGap) # transform to a raster
values(predicted_rasterFLOOR) <- predictionsFLOOR # predictions to the raster
#writeRaster(predicted_rasterFLOOR, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/predicted_rasterFLOOR.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif
#### ceil####
all_values <- raster::getValues(TENmresGap)
#non_na_values <- na.omit(all_values)
#view(non_na_values) # for visualization without NA values
new_dataCEIL <- data.frame(GAP = all_values) # dataframe
# Use the predict function to get predictions from the created model
predictionsCEIL <- predict(modelMarCEIL, newdata = new_dataCEIL)
predicted_rasterCEIL <- raster(TENmresGap) # transform to a raster
values(predicted_rasterCEIL) <- predictionsCEIL # predictions to the raster
#writeRaster(predicted_rasterCEIL, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/predicted_rasterCEIL.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif
Corrected C for only Mar
# In QGIS, I separated out the C predictions so that it is only for mar pixles
reclassified_C_onlyMAR <- C_onlyMAR # new output to be reclassified
reclassified_C_onlyMAR[reclassified_C_onlyMAR < 0] <- 0 # reclassify now so that any predictions that are negative are corrected to 0
plot(reclassified_C_onlyMAR) # view
#writeRaster(reclassified_C_onlyMAR, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/reclassified_C_onlyMAR.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif
all_values <- raster::getValues(reclassified_C_onlyMAR) # extract values
A <- na.omit(all_values) # get values without NAs
#### Ceil and floor ####
reclassified_C_onlyMARF <- C_onlyMARF # new output to be reclassified
reclassified_C_onlyMARF[reclassified_C_onlyMARF < 0] <- 0 # reclassify now so that any predictions that are negative are corrected to 0
plot(reclassified_C_onlyMARF) # view
writeRaster(reclassified_C_onlyMARF, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/reclassified_C_onlyMARF.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif
all_values <- raster::getValues(reclassified_C_onlyMARF) # extract values
FL <- na.omit(all_values) # get values without NAs
reclassified_C_onlyMARC <- C_onlyMARC # new output to be reclassified
reclassified_C_onlyMARC[reclassified_C_onlyMARC < 0] <- 0 # reclassify now so that any predictions that are negative are corrected to 0
plot(reclassified_C_onlyMARC) # view
writeRaster(reclassified_C_onlyMARC, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/reclassified_C_onlyMARC.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif
all_values <- raster::getValues(reclassified_C_onlyMARC) # extract values
CL <- na.omit(all_values) # get values without NAs
AGB for the Mar in the CHM
AA <- data.frame(A)
AA$upper <- NA
AA$lower <- NA
AA$upper <- AA$A+37.98
AA$lower <- AA$A-37.98
SumA <- sum(AA$A)*.01 # 100 to match landsat
SumAupper <- sum(AA$upper)*.01 # 100 to match landsat
SumAlower <- sum(AA$lower)*.01 # 100 to match landsat
CLL <- data.frame(CL)
CLL$upper <- NA
CLL$lower <- NA
CLL$upper <- CLL$CL+38.41
CLL$lower <- CLL$CL-38.41
SumCL <- sum(CLL$CL)*.01 # 100 to match landsat
SumCLupper <- sum(CLL$upper)*.01 # 100 to match landsat
SumCLlower <- sum(CLL$lower)*.01 # 100 to match landsat
FLL <- data.frame(FL)
FLL$upper <- NA
FLL$lower <- NA
FLL$upper <- FLL$FL+37.38
FLL$lower <- FLL$FL-37.38
SumFL <- sum(FLL$FL)*.01 # 100 to match landsat
SumFLupper <- sum(FLL$upper)*.01 # 100 to match landsat
SumFLlower <- sum(FLL$lower)*.01 # 100 to match landsat
View C for mar in park and expand
# look at c data
hist(A)
hist(CL)
hist(FL)
# get avg C sequestration
MMar <- mean(A) #170.258 Mg/Ha
MMarAGC <- MMar*0.47 # 80.02126 MgC/Ha
# get ceil
MMarCL <- mean(CL) #207.8015 Mg/Ha
MMarAGCCL <- MMarCL*0.47 # 97.6667 MgC/Ha
# get floor
MMarFL <- mean(FL) #145.6434 Mg/Ha
MMarAGCFL <- MMarFL*0.47 # 68.45238 MgC/Ha
#GCPLC_Vals <- raster::getValues(GCPLC) # ground points lc, examine
#frequency_counts <- table(GCPLC_Vals) #table function to get frequency counts
#GCPLC_Vals (each pixle is 10m x 10m area)
# 1 9 100
# 48505861 51669484 20629705
# og estimate for odzala forest would be:
forested <- 20629705 + 51669484 + 48505861 # in 100m^2
marforested <- 48505861
rivforested <- 20629705
denforested <- 51669484
# convert to ha
HAforested <- forested*(.01) # in ha: odzala is around 1.35 million ha. Most rest is savanna
HAmarforested <- marforested*(0.01)
HArivforested <- rivforested*(0.01)
HAdenforested <- denforested*(0.01)
# percentage of the forested park mar
pmar <- HAmarforested/HAforested # around 40%
priv <- HArivforested/HAforested # around 17%
pden <- HAdenforested/HAforested # around 42%